for source in news_sources: # The source is a list similar to # ['the-verge', 'the-wall-street-journal', 'the-washington-post', 'the-washington-times', 'time', 'usa-today', 'vice-news', 'wired'] # # we should set the range to the maximum possible number based on the total results devided by page_size # articles = newsapi.get_everything(sources=source, from_param=str(date), sort_by='relevancy', page_size=100) # articles['totalResults'] // 100 + 1 for page in range(1, 3): articles = newsapi.get_everything(sources=source, from_param=str(date), sort_by='relevancy', page_size=100, page=page) try: indexes = [i for i, v in enumerate(articles['articles']) if articles['articles'][i]['content'] is None] for index in sorted(indexes, reverse=True): del articles['articles'][index] date_str = date.strftime("%Y%m%d") filename = '_'.join([source, date_str]) json_file_name = get_json_file(filename, page, json_file_path) with open(json_file_name, 'w+') as f: f.write(ujson.dumps(articles) + '\n') f.close() except Exception as e: print(e)