Skip to content

Instantly share code, notes, and snippets.

@crashish
Created January 22, 2018 07:51
Show Gist options
  • Select an option

  • Save crashish/d64d415d5912c883aa6089154d6ac3de to your computer and use it in GitHub Desktop.

Select an option

Save crashish/d64d415d5912c883aa6089154d6ac3de to your computer and use it in GitHub Desktop.

Revisions

  1. crashish created this gist Jan 22, 2018.
    56 changes: 56 additions & 0 deletions golden_globes.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,56 @@
    # twitter + insomnia + curiosity = crappy python code
    # https://twitter.com/criznash/status/950268556544942080

    import requests
    import bs4

    comedy = "https://en.wikipedia.org/wiki/Golden_Globe_Award_for_Best_Motion_Picture_%E2%80%93_Musical_or_Comedy"
    director = "https://en.wikipedia.org/wiki/Golden_Globe_Award_for_Best_Director"

    d = requests.get(comedy)
    soup = bs4.BeautifulSoup(d.text, "html.parser")
    tables = soup.select(".wikitable")
    pt = {}
    for table in tables:
    year = ""
    for row in table.find_all('tr')[1:]:
    fields = row.find_all('a', title=True)
    if len(fields) == 1:
    continue
    if 'Golden Globe Awards' in fields[0].attrs['title'] or fields[0].text in ['1951', '1954', '1955']:
    year = fields[0].text
    pt[year] = [(fields[1].text+"*", fields[2].text)]
    else:
    pt[year].append( (fields[0].text, fields[1].text) )



    d2 = requests.get(director)
    soup2 = bs4.BeautifulSoup(d2.text, "html.parser")
    tables2 = soup2.select(".wikitable")
    pt2 = {}
    for table in tables2:
    year = ""
    for row in table.find_all('tr')[1:]:
    fields = row.find_all('a', title=True)
    if len(fields) == 1:
    continue
    if 'Golden Globe Awards' in fields[0].attrs['title'] or fields[0].text in ['1951', '1954', '1955']:
    year = fields[0].text
    pt2[year] = [(fields[2].text, fields[1].text)]
    else:
    pt2[year].append( (fields[1].text, fields[0].text) )
    res = {}
    for year in pt:
    for tup in pt[year]:
    for tup2 in pt2[year]:
    if tup2[0] in tup[0]:
    if year not in res:
    res[year] = [tup]
    else:
    res[year].append(tup)


    import pprint
    pprint.pprint(res)
    print "{}/{}".format(len(res), len(pt))