Skip to content

Instantly share code, notes, and snippets.

@alexmorozov
Created September 7, 2016 09:25
Show Gist options
  • Save alexmorozov/3da79f886314b85985dde87211db933d to your computer and use it in GitHub Desktop.
Save alexmorozov/3da79f886314b85985dde87211db933d to your computer and use it in GitHub Desktop.

Revisions

  1. alexmorozov created this gist Sep 7, 2016.
    84 changes: 84 additions & 0 deletions parse_italki.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,84 @@
    #--coding: utf8--

    from datetime import datetime
    import logging
    log = logging.getLogger(__name__)
    logging.basicConfig(level=logging.INFO)
    import time

    import requests
    from tinydb import TinyDB, where

    db = TinyDB('partners.json')


    class User(object):
    def __init__(self, data):
    for k, v in data.items():
    setattr(self, k, v)

    @property
    def url(self):
    return 'https://www.italki.com/user/{o.id}'.format(o=self)

    @property
    def last_logon(self):
    return datetime.strptime(self.last_login_time[:10], '%Y-%m-%d')

    def __unicode__(self):
    return u'{o.url}\t{o.nickname}\t{o.living_country_id}\t{o.last_logon}'.format(o=self)

    def __str__(self):
    return self.__unicode__()


    def parse_page(page, max=None):
    if max is not None and not max:
    return
    url = 'https://www.italki.com/api/partner?_r=1462440964199&country=&gender=&hl=en-us&is_native=1&learn=russian&page={page}&speak=english&token=TWprMU5EWTRPQT09fDE0NjI0Mzg3MTN8Y2MyNDk4ZGJhMGMyNDY2ZmUxNTA3ZDA5NDJlM2QzNTBjN2YxOTNjNw%3D%3D' # NOQA
    response = requests.get(url.format(page=page))
    data = response.json()

    for user in data['data']:
    if not db.get(where('id') == user['id']):
    log.info('Adding %s (%d)...', user['nickname'], user['id'])
    db.insert(user)
    else:
    log.info('Skipping %s (%d) as it exists...',
    user['nickname'], user['id'])

    if data['meta']['has_next']:
    time.sleep(5)
    log.info('Parsing next page...')
    if max is not None:
    max -= 1
    parse_page(page + 1, max)


    def find_decent():
    today = datetime.now()

    for user in db.all():
    if user['is_pro'] or user['is_tutor']:
    continue
    if user['living_country_id'] in ('US', 'CA'):
    continue
    for lang in user['language_obj_s']:
    if lang['language'] == 'russian' and lang['level'] >= 3:
    good = True
    break
    else:
    good = False
    if not good:
    continue
    last_logon = datetime.strptime(user['last_login_time'][:10],
    '%Y-%m-%d')
    if (today - last_logon).days >= 90:
    continue
    yield User(user)


    if __name__ == '__main__':
    #parse_page(1, 50)
    for user in find_decent():
    print unicode(user).encode('utf-8')