Skip to content

Instantly share code, notes, and snippets.

@alexmorozov
Created September 7, 2016 09:25
Show Gist options
  • Save alexmorozov/3da79f886314b85985dde87211db933d to your computer and use it in GitHub Desktop.
Save alexmorozov/3da79f886314b85985dde87211db933d to your computer and use it in GitHub Desktop.
#--coding: utf8--
from datetime import datetime
import logging
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
import time
import requests
from tinydb import TinyDB, where
db = TinyDB('partners.json')
class User(object):
def __init__(self, data):
for k, v in data.items():
setattr(self, k, v)
@property
def url(self):
return 'https://www.italki.com/user/{o.id}'.format(o=self)
@property
def last_logon(self):
return datetime.strptime(self.last_login_time[:10], '%Y-%m-%d')
def __unicode__(self):
return u'{o.url}\t{o.nickname}\t{o.living_country_id}\t{o.last_logon}'.format(o=self)
def __str__(self):
return self.__unicode__()
def parse_page(page, max=None):
if max is not None and not max:
return
url = 'https://www.italki.com/api/partner?_r=1462440964199&country=&gender=&hl=en-us&is_native=1&learn=russian&page={page}&speak=english&token=TWprMU5EWTRPQT09fDE0NjI0Mzg3MTN8Y2MyNDk4ZGJhMGMyNDY2ZmUxNTA3ZDA5NDJlM2QzNTBjN2YxOTNjNw%3D%3D' # NOQA
response = requests.get(url.format(page=page))
data = response.json()
for user in data['data']:
if not db.get(where('id') == user['id']):
log.info('Adding %s (%d)...', user['nickname'], user['id'])
db.insert(user)
else:
log.info('Skipping %s (%d) as it exists...',
user['nickname'], user['id'])
if data['meta']['has_next']:
time.sleep(5)
log.info('Parsing next page...')
if max is not None:
max -= 1
parse_page(page + 1, max)
def find_decent():
today = datetime.now()
for user in db.all():
if user['is_pro'] or user['is_tutor']:
continue
if user['living_country_id'] in ('US', 'CA'):
continue
for lang in user['language_obj_s']:
if lang['language'] == 'russian' and lang['level'] >= 3:
good = True
break
else:
good = False
if not good:
continue
last_logon = datetime.strptime(user['last_login_time'][:10],
'%Y-%m-%d')
if (today - last_logon).days >= 90:
continue
yield User(user)
if __name__ == '__main__':
#parse_page(1, 50)
for user in find_decent():
print unicode(user).encode('utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment