pythontr.com
#!/usr/bin/env python # -*- coding: utf8 -*- ############################################################################### # 19.03.2013 # Huseyin OZDEMIR # husonet # Pinterest uzerinden ############################################################################### import re import urllib2 # URL· URL = "[url]http://pinterest.com/[/url]" SAYFALAMA = 24 # Desen PATTERN = "<h4><a href=\"/(.*?)/\">(.*?)</a></h4>" PATTERN = re.compile(PATTERN, re.DOTALL) # Desen twitter sayisi following PATTERN_FOLLOWING = "<a href=\"/.*?/following/\".*?>.*?<strong>(.*?)</strong>" PATTERN_FOLLOWING+= ".*?</a>" PATTERN_FOLLOWING = re.compile(PATTERN_FOLLOWING, re.DOTALL) # Desen twitter sayisi followers PATTERN_FOLLOWERS = "<a href=\"/.*?/followers/\".*?>.*?<strong>(.*?)</strong>" PATTERN_FOLLOWERS+= ".*?</a>" PATTERN_FOLLOWERS = re.compile(PATTERN_FOLLOWERS, re.DOTALL) # Debug yapilacak mi? Test ortami icin 1, gercek calisma ortami icin 0 DEBUG = 1 #------------------------------------------------------------------------------ # PINTEREST istemci sinifi. class Pinterest(): # Kullanici Adi username = "" # Surum Numarasi version = "1" # response response = "" # follow count follow_count = 0 # ------------------------------------------------------------------------- # Nesne ilk olusturuldugunda calisacak bolum. Eger verildiyse ilk # degerler # set ediliyor. def __init__(self, username=None, version=None): if username is not None: self.username = username if version is not None: self.version = version # ------------------------------------------------------------------------- # Url hazirlar follow yada unfollow def getUrl(self, sUrl, sUserName, sFollow='followers', sSayfa=1): return URL + sUserName + '/' + sFollow + '/?marker=' + str(sSayfa) # ------------------------------------------------------------------------- # Open URL def openUrl(self, sAdres): try: req = urllib2.Request(sAdres) req.add_header('User-agent', 'Mozilla/5.0') res = urllib2.urlopen(req) result = res.read() except Exception, err: if DEBUG: raise else: print(str(err)) result = None return result # ------------------------------------------------------------------------- # Parse Body def parseResponse(self, sResponse): try: result = "" g = PATTERN.findall(sResponse) for h in g: print h[0] #print h[1] except Exception, err: if DEBUG: raise else: print(str(err)) result = None return result # ------------------------------------------------------------------------- # Parse Follow Count def parseFollowCount(self, sResponse, sFollowType='following'): try: result = "" if sFollowType == 'following': g = PATTERN_FOLLOWING.search(sResponse) else: g = PATTERN_FOLLOWERS.search(sResponse) result = g.group(1) except Exception, err: if DEBUG: raise else: print(str(err)) result = None return result # ----------------------------------------------------------------------------- if __name__ == '__main__': follow = 'followers' p = Pinterest() p.username = "habermonitor" #p.response = p.openUrl(p.getUrl(URL, p.username, 'following')) p.response = p.openUrl(p.getUrl(URL, p.username, follow)) if not p.response == None: p.follow_count = p.parseFollowCount(p.response) print p.follow_count p.parseResponse(p.response) # geri kalan sayfaları tarayalım count_page = p.follow_count count_page = count_page.replace(',', '') count_page = (int(count_page) / SAYFALAMA) + 1 i = 1 while i < count_page: i = i + 1 p.response = p.openUrl(p.getUrl(URL, p.username, follow, i*SAYFALAMA)) p.parseResponse(p.response)
Yorumlar