from splinter.browser import Browser browser = Browser('zope.testbrowser') realProxies = [] #Loop through all pages, any big number will do because loop cancels when finds no proxies for i in range(1,100): #Urls < 10 have prefix of 0 if i <= 9: i = '0%d' % i #Build URL String url = 'http://www.samair.ru/proxy/proxy-%s%s' % (i, '.htm') browser.visit(url) #Grab the table cell which the proxies are in proxies = browser.find_by_css("#main_content .proxylist .box_text .tablelist tr td") for proxy in proxies: #Proxies are sometimes thrown in with other crap, this weeds out the other crap if not ":" in proxy.text or not "." in proxy.text or "o" in proxy.text: continue realProxies.append(proxy.text) #If there were under 50 table cells on this page, then it's the last page, so break the loop if len(proxies) < 50: break #Firefox RAM usage seems to built up over time, so every 5 pages, restart firefox if int(i) % 5 == 0: browser.quit() browser = Browser() browser.quit() file = open('proxies.txt','w') file.write('\n'.join(realProxies)) print str(len(realProxies)) + ' Added to proxies.txt' -------------------------- Traceback (most recent call last): File "/Users/Jake/Desktop/python/01/NewPythonProject/src/test.py", line 25, in if not ":" in proxy.text or not "." in proxy.text or "o" in proxy.text: continue TypeError: argument of type 'NoneType' is not iterable None