#--coding: utf8-- from datetime import datetime import logging log = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) import time import requests from tinydb import TinyDB, where db = TinyDB('partners.json') class User(object): def __init__(self, data): for k, v in data.items(): setattr(self, k, v) @property def url(self): return 'https://www.italki.com/user/{o.id}'.format(o=self) @property def last_logon(self): return datetime.strptime(self.last_login_time[:10], '%Y-%m-%d') def __unicode__(self): return u'{o.url}\t{o.nickname}\t{o.living_country_id}\t{o.last_logon}'.format(o=self) def __str__(self): return self.__unicode__() def parse_page(page, max=None): if max is not None and not max: return url = 'https://www.italki.com/api/partner?_r=1462440964199&country=&gender=&hl=en-us&is_native=1&learn=russian&page={page}&speak=english&token=TWprMU5EWTRPQT09fDE0NjI0Mzg3MTN8Y2MyNDk4ZGJhMGMyNDY2ZmUxNTA3ZDA5NDJlM2QzNTBjN2YxOTNjNw%3D%3D' # NOQA response = requests.get(url.format(page=page)) data = response.json() for user in data['data']: if not db.get(where('id') == user['id']): log.info('Adding %s (%d)...', user['nickname'], user['id']) db.insert(user) else: log.info('Skipping %s (%d) as it exists...', user['nickname'], user['id']) if data['meta']['has_next']: time.sleep(5) log.info('Parsing next page...') if max is not None: max -= 1 parse_page(page + 1, max) def find_decent(): today = datetime.now() for user in db.all(): if user['is_pro'] or user['is_tutor']: continue if user['living_country_id'] in ('US', 'CA'): continue for lang in user['language_obj_s']: if lang['language'] == 'russian' and lang['level'] >= 3: good = True break else: good = False if not good: continue last_logon = datetime.strptime(user['last_login_time'][:10], '%Y-%m-%d') if (today - last_logon).days >= 90: continue yield User(user) if __name__ == '__main__': #parse_page(1, 50) for user in find_decent(): print unicode(user).encode('utf-8')