Created
April 3, 2019 05:03
-
-
Save sandgate-dev/07ef8221be45378e931f42f923ad5b17 to your computer and use it in GitHub Desktop.
Revisions
-
sandgate-dev created this gist
Apr 3, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,25 @@ for source in news_sources: # The source is a list similar to # ['the-verge', 'the-wall-street-journal', 'the-washington-post', 'the-washington-times', 'time', 'usa-today', 'vice-news', 'wired'] # # we should set the range to the maximum possible number based on the total results devided by page_size # articles = newsapi.get_everything(sources=source, from_param=str(date), sort_by='relevancy', page_size=100) # articles['totalResults'] // 100 + 1 for page in range(1, 3): articles = newsapi.get_everything(sources=source, from_param=str(date), sort_by='relevancy', page_size=100, page=page) try: indexes = [i for i, v in enumerate(articles['articles']) if articles['articles'][i]['content'] is None] for index in sorted(indexes, reverse=True): del articles['articles'][index] date_str = date.strftime("%Y%m%d") filename = '_'.join([source, date_str]) json_file_name = get_json_file(filename, page, json_file_path) with open(json_file_name, 'w+') as f: f.write(ujson.dumps(articles) + '\n') f.close() except Exception as e: print(e)