Created
          September 7, 2016 09:25 
        
      - 
      
- 
        Save alexmorozov/3da79f886314b85985dde87211db933d to your computer and use it in GitHub Desktop. 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #--coding: utf8-- | |
| from datetime import datetime | |
| import logging | |
| log = logging.getLogger(__name__) | |
| logging.basicConfig(level=logging.INFO) | |
| import time | |
| import requests | |
| from tinydb import TinyDB, where | |
| db = TinyDB('partners.json') | |
| class User(object): | |
| def __init__(self, data): | |
| for k, v in data.items(): | |
| setattr(self, k, v) | |
| @property | |
| def url(self): | |
| return 'https://www.italki.com/user/{o.id}'.format(o=self) | |
| @property | |
| def last_logon(self): | |
| return datetime.strptime(self.last_login_time[:10], '%Y-%m-%d') | |
| def __unicode__(self): | |
| return u'{o.url}\t{o.nickname}\t{o.living_country_id}\t{o.last_logon}'.format(o=self) | |
| def __str__(self): | |
| return self.__unicode__() | |
| def parse_page(page, max=None): | |
| if max is not None and not max: | |
| return | |
| url = 'https://www.italki.com/api/partner?_r=1462440964199&country=&gender=&hl=en-us&is_native=1&learn=russian&page={page}&speak=english&token=TWprMU5EWTRPQT09fDE0NjI0Mzg3MTN8Y2MyNDk4ZGJhMGMyNDY2ZmUxNTA3ZDA5NDJlM2QzNTBjN2YxOTNjNw%3D%3D' # NOQA | |
| response = requests.get(url.format(page=page)) | |
| data = response.json() | |
| for user in data['data']: | |
| if not db.get(where('id') == user['id']): | |
| log.info('Adding %s (%d)...', user['nickname'], user['id']) | |
| db.insert(user) | |
| else: | |
| log.info('Skipping %s (%d) as it exists...', | |
| user['nickname'], user['id']) | |
| if data['meta']['has_next']: | |
| time.sleep(5) | |
| log.info('Parsing next page...') | |
| if max is not None: | |
| max -= 1 | |
| parse_page(page + 1, max) | |
| def find_decent(): | |
| today = datetime.now() | |
| for user in db.all(): | |
| if user['is_pro'] or user['is_tutor']: | |
| continue | |
| if user['living_country_id'] in ('US', 'CA'): | |
| continue | |
| for lang in user['language_obj_s']: | |
| if lang['language'] == 'russian' and lang['level'] >= 3: | |
| good = True | |
| break | |
| else: | |
| good = False | |
| if not good: | |
| continue | |
| last_logon = datetime.strptime(user['last_login_time'][:10], | |
| '%Y-%m-%d') | |
| if (today - last_logon).days >= 90: | |
| continue | |
| yield User(user) | |
| if __name__ == '__main__': | |
| #parse_page(1, 50) | |
| for user in find_decent(): | |
| print unicode(user).encode('utf-8') | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment