spacepaste

  1.  
  2. import urllib2
  3. import re
  4. from os.path import basename
  5. from urlparse import urlsplit
  6. url = "http://tieba.baidu.com/p/683991021"
  7. def getPage(url):
  8. url=url+"?see_lz=1"
  9. urlContent = urllib2.urlopen(url).read()
  10. page='<span class="red">(.*?)</span>'
  11. thePage=re.findall(page,urlContent)
  12. return int(thePage[0])
  13. def downImg(url):
  14. urlContent = urllib2.urlopen(url).read()
  15. spans='<cc>(.*?)</cc>'
  16. ss=re.findall(spans,urlContent)
  17. obImgs=','.join(ss)
  18. imgUrls = re.findall('img .*?src="(.*?)"', obImgs)
  19. for imgUrl in imgUrls:
  20. try:
  21. imgData = urllib2.urlopen(imgUrl).read()
  22. fileName = basename(urlsplit(imgUrl)[2])
  23. output = open(fileName,'wb')
  24. output.write(imgData)
  25. output.close()
  26. except:
  27. print "Er.."
  28. def downLoad(url):
  29. numb=getPage(url)
  30. cont=0
  31. print "There are "+str(numb)+" pages."
  32. while cont<numb:
  33. cont+=1
  34. print "Downloading "+url+"?see_lz=1&pn="+str(cont)+"..."
  35. downImg(url+"?see_lz=1&pn="+str(cont))
  36. print 'Completed!'
  37. downLoad(url)
  38.