Skip to content

Instantly share code, notes, and snippets.

@jaklinger
Created July 5, 2018 09:02
Show Gist options
  • Select an option

  • Save jaklinger/d3c4940248c4a3dc52ce4d3d55708009 to your computer and use it in GitHub Desktop.

Select an option

Save jaklinger/d3c4940248c4a3dc52ce4d3d55708009 to your computer and use it in GitHub Desktop.

Revisions

  1. jaklinger created this gist Jul 5, 2018.
    35 changes: 35 additions & 0 deletions papers_by_fos.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,35 @@
    import pandas as pd
    from sqlalchemy import create_engine
    from sqlalchemy.sql import text as sql_text
    from collections import defaultdict

    with open('/Users/jklinger/Nesta-AWS/AWS-RDS-config/open-academic-graph.config') as f:
    host, port, database, user, password = f.read().split(':')
    database_uri = 'postgresql://{}:{}@{}/{}'.format(user, password, host, "microsoft_academic_graph")
    con = create_engine(database_uri)


    query = ''' select paper from microsoft_academic_graph
    where ((paper -> 'fos'::text)) @> :fos
    and ((paper ->> 'lang'::text)) = 'en'
    and paper::jsonb ? 'abstract'
    and paper::jsonb ? 'keywords'
    limit 3000'''

    fosses = ['Biology', 'Medicine','Geology','Chemistry',
    'Psychology','Philosophy','Sociology','Engineering',
    'Economics','Computer Science','Art','Physics',
    'History','Political Science','Materials Science',
    'Mathematics','Geography','Business'] #,'Environmental Science']

    ids = []
    i=0
    papers = defaultdict(list)
    for fos in fosses:
    print(fos)
    exec_result = con.execute(sql_text(query), fos='["{}"]'.format(fos))
    for paper, in exec_result.fetchall():
    papers[fos].append(paper)
    ids.append(paper['id'])

    len(set(ids)), len(ids)