import urllib2
import re
from os.path import basename
from urlparse import urlsplit
url = "http://tieba.baidu.com/p/683991021"
def getPage(url):
url=url+"?see_lz=1"
urlContent = urllib2.urlopen(url).read()
page='(.*?)'
thePage=re.findall(page,urlContent)
return int(thePage[0])
def downImg(url):
urlContent = urllib2.urlopen(url).read()
spans='(.*?)'
ss=re.findall(spans,urlContent)
obImgs=','.join(ss)
imgUrls = re.findall('img .*?src="(.*?)"', obImgs)
for imgUrl in imgUrls:
try:
imgData = urllib2.urlopen(imgUrl).read()
fileName = basename(urlsplit(imgUrl)[2])
output = open(fileName,'wb')
output.write(imgData)
output.close()
except:
print "Er.."
def downLoad(url):
numb=getPage(url)
cont=0
print "There are "+str(numb)+" pages."
while cont