class InitCSVHandler(webapp2.RequestHandler): def get(self): fetched = self.request.get('fetched') if not fetched: self.response.write("no fetched date") return taskqueue.add(url='/create_csv', queue_name='csv', params={'fetched': fetched}) self.response.write("creating " + str(fetched)) class CreateCSVHandler(webapp2.RequestHandler): def post(self): fetched = self.request.get('fetched') fetched = datetime.strptime(fetched, "%Y-%m-%d") ctx = ndb.get_context() ctx.set_cache_policy(lambda key: key.kind() != 'Page') pages_query = Page.query() pages_query = pages_query.filter(Page.fetched >= fetched) pages_query = pages_query.filter(Page.fetched < fetched + timedelta(days=1)) filename = "/csv123/" + self.request.get('fetched') + ".csv" logging.info("filename: " + filename) gcs_file = gcs.open(filename, 'w', content_type="text/plain") cursor = None while True: pages, next_cursor, more = pages_query.fetch_page(50, start_cursor=cursor) for page in pages: string = "" string += page.fetched.strftime("%Y-%m-%d %H:%M:%S") string += ";" string += urllib.quote(page.url) string += ";" string += base64.b64encode(zlib.compress(page.html.encode('utf-8'), 9)) string += "\n" gcs_file.write(string) if(more): cursor = next_cursor else: break gcs_file.close() gc.collect() ctx.clear_cache()