import requests import json import time import threading from redis import Redis from rq import Queue from bnmp_scraping_court_orders import download_state_page headers = { 'Host': 'www.cnj.jus.br', 'Connection': 'keep-alive', 'Accept': 'application/json, text/plain, */*', 'Origin': 'http://www.cnj.jus.br', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Content-Type': 'application/json;charset=UTF-8', 'Referer': 'http://www.cnj.jus.br/bnmp/', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7,es;q=0.6' } def request(url, payload, headers): while True: try: post = requests.post(url, json = payload, headers = headers) return post.json() except e: print('Something wrong happened... sleeping 10 seconds') time.sleep(10) search = 'http://www.cnj.jus.br/bnmp/rest/pesquisar' def run(UF): payload_json = {"criterio":{"orgaoJulgador":{"uf":UF,"municipio":"","descricao":""},"orgaoJTR":{},"parte":{"documentos":[{"identificacao":'null'}]}},"paginador":{"paginaAtual":1},"fonetica":"true","ordenacao":{"porNome":'false',"porData":'true'}} print('Downloading %s state' %UF) response = request(search, payload_json, headers) pagination_number = response['paginador']['totalPaginas'] print ('== %s pages' %pagination_number) # Queues queue = Queue('court_orders', connection=Redis()) # Threading -> for i in range (1, pagination_number + 1): queue.enqueue(download_state_page, UF, i) states = '''AC AL AP AM BA CE DF ES GO MA MT MS MG PA PB PR PE PI RJ RN RS RO RR SC SE TO'''.split() for state in states: run(state)