Created
September 7, 2016 09:25
-
-
Save alexmorozov/3da79f886314b85985dde87211db933d to your computer and use it in GitHub Desktop.
Revisions
-
alexmorozov created this gist
Sep 7, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,84 @@ #--coding: utf8-- from datetime import datetime import logging log = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) import time import requests from tinydb import TinyDB, where db = TinyDB('partners.json') class User(object): def __init__(self, data): for k, v in data.items(): setattr(self, k, v) @property def url(self): return 'https://www.italki.com/user/{o.id}'.format(o=self) @property def last_logon(self): return datetime.strptime(self.last_login_time[:10], '%Y-%m-%d') def __unicode__(self): return u'{o.url}\t{o.nickname}\t{o.living_country_id}\t{o.last_logon}'.format(o=self) def __str__(self): return self.__unicode__() def parse_page(page, max=None): if max is not None and not max: return url = 'https://www.italki.com/api/partner?_r=1462440964199&country=&gender=&hl=en-us&is_native=1&learn=russian&page={page}&speak=english&token=TWprMU5EWTRPQT09fDE0NjI0Mzg3MTN8Y2MyNDk4ZGJhMGMyNDY2ZmUxNTA3ZDA5NDJlM2QzNTBjN2YxOTNjNw%3D%3D' # NOQA response = requests.get(url.format(page=page)) data = response.json() for user in data['data']: if not db.get(where('id') == user['id']): log.info('Adding %s (%d)...', user['nickname'], user['id']) db.insert(user) else: log.info('Skipping %s (%d) as it exists...', user['nickname'], user['id']) if data['meta']['has_next']: time.sleep(5) log.info('Parsing next page...') if max is not None: max -= 1 parse_page(page + 1, max) def find_decent(): today = datetime.now() for user in db.all(): if user['is_pro'] or user['is_tutor']: continue if user['living_country_id'] in ('US', 'CA'): continue for lang in user['language_obj_s']: if lang['language'] == 'russian' and lang['level'] >= 3: good = True break else: good = False if not good: continue last_logon = datetime.strptime(user['last_login_time'][:10], '%Y-%m-%d') if (today - last_logon).days >= 90: continue yield User(user) if __name__ == '__main__': #parse_page(1, 50) for user in find_decent(): print unicode(user).encode('utf-8')