Skip to content

Instantly share code, notes, and snippets.

View gfelitti's full-sized avatar

Guilherme Felitti gfelitti

  • São Paulo
View GitHub Profile
import os
import pandas as pd
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.discovery import build
import google.oauth2.credentials
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
@gfelitti
gfelitti / youtube_geo.py
Last active February 26, 2024 18:22
youtube_geo
# !pip install randomheaders pandas requests
import requests
from pandas import json_normalize
from datetime import datetime, timedelta
import randomheaders
h = randomheaders.LoadHeader()
class YouTubeVideoFetcher:
import requests
import pandas as pd
headers = {
'Connection': 'keep-alive',
'sec-ch-ua': '"Chromium";v="88", "Google Chrome";v="88", ";Not A Brand";v="99"',
'Range-unit': 'items',
'Prefer': 'count=none',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36',
import pandas as pd
pd.set_option('display.max_rows', None)
import zipfile
import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline
#abrir shapefile
shape=gpd.read_file('path/shapfile/UFs_Brasil')
@gfelitti
gfelitti / IBGE2GoogleCalendar.py
Last active July 24, 2020 12:22
Script em Python para raspar o calendário do IBGE e gravar automaticamente no Google Calendar as pesquisas que serão divulgadas até o fim de 2020. A conexão com o Google Calendar usou de base esse tutorial -> https://gist.github.com/nikhilkumarsingh/8a88be71243afe8d69390749d16c8322) Todo: FGV-Ibre https://portalibre.fgv.br/calendario-de-divulgac…
#garanta que o módulo do google esteja instalado no seu env
import sys
!{sys.executable} -m pip install google-api-python-client
import pandas as pd
import requests
import randomheaders
from bs4 import BeautifulSoup
@gfelitti
gfelitti / corona.py
Last active March 3, 2022 03:02
Como raspar os jsons do https://covid.saude.gov.br/ - se começar a dar problema, tente rotacionar os headers
import requests
import pandas as pd
from pandas.io.json import json_normalize
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0',
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3',
'X-Parse-Application-Id': 'unAFkcaNDeXajurGB7LChj8SgQYS2ptm',
'Origin': 'https://covid.saude.gov.br',
# List unique values in a DataFrame column
pd.unique(df.column_name.ravel())
# Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)
# Grab DataFrame rows where column has certain values
valuelist = ['value1', 'value2', 'value3']
df = df[df.column.isin(valuelist)]