Skip to content

Instantly share code, notes, and snippets.

@LucWeber
Forked from neubig/get_citations.py
Created May 29, 2024 12:35
Show Gist options
  • Select an option

  • Save LucWeber/764dfebcc832af31043b009f228b8b31 to your computer and use it in GitHub Desktop.

Select an option

Save LucWeber/764dfebcc832af31043b009f228b8b31 to your computer and use it in GitHub Desktop.

Revisions

  1. @neubig neubig created this gist Oct 21, 2021.
    40 changes: 40 additions & 0 deletions get_citations.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    import requests
    import sys
    import time

    sleep_time = 20
    def query_api(url, session):
    global sleep_time
    time.sleep(sleep_time / 1000.0)
    r = session.get(url)
    while r.status_code == 429:
    sleep_time *= 2
    print(
    f'WARNING: Hit rate limit. Increasing sleep to {sleep_time} ms',
    file=sys.stderr,
    )
    time.sleep(sleep_time / 1000.0)
    r = session.get(url)
    if r.status_code != 200:
    print(f'WARNING: Could not access url {url}', file=sys.stderr)
    return None
    else:
    return r.json()

    with open('s2key.txt', 'r') as f:
    s2_key = next(f).strip()
    session = requests.Session()
    session.headers.update({'x-api-key': s2_key})

    def print_all_citations(conf, num_papers):
    for pid in range(1,num_papers+1):
    aclid=f'{conf}.{pid}'
    s2url=f'https://api.semanticscholar.org/v1/paper/ACL:{aclid}'
    paper_data = query_api(s2url, session)
    if paper_data != None:
    citations = len(paper_data['citations'])
    print(f'{aclid}\t{citations}')

    # EMNLP Papers
    # print_all_citations('2020.emnlp-main', 752)
    print_all_citations('2020.findings-emnlp', 447)
    53 changes: 53 additions & 0 deletions make_lot.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,53 @@
    import matplotlib
    import numpy as np
    import os
    from matplotlib import pyplot as plt
    import sys

    bar_colors = ["#7293CB", "#E1974C", "#84BA5B", "#D35E60", "#808585", "#9067A7", "#AB6857", "#CCC210"]

    def make_bar_chart(datas,
    output_directory, output_fig_file, bar_names, output_fig_format='png',
    errs=None, title=None, xlabel=None, xticklabels=None, ylabel=None):
    fig, ax = plt.subplots()
    ind = np.arange(len(datas[0]))
    width = 0.7/len(datas)
    bars = []
    for i, data in enumerate(datas):
    err = errs[i] if errs != None else None
    bars.append(ax.bar(ind+i*width, data, width, color=bar_colors[i], bottom=0, yerr=err))
    # Set axis/title labels
    if title is not None:
    ax.set_title(title)
    if xlabel is not None:
    ax.set_xlabel(xlabel)
    if ylabel is not None:
    ax.set_ylabel(ylabel)
    if xticklabels is not None:
    ax.set_xticks(ind + width / 2)
    ax.set_xticklabels(xticklabels)
    plt.xticks(rotation=70)
    else:
    ax.xaxis.set_visible(False)

    ax.legend(bars, bar_names)
    ax.autoscale_view()

    if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    out_file = os.path.join(output_directory, f'{output_fig_file}.{output_fig_format}')
    plt.savefig(out_file, format=output_fig_format, bbox_inches='tight')

    histogram_buckets = [0,1,2,5,10,20,50,100,200,500,1000,10000]

    conf_data = [[0 for _ in histogram_buckets] for _ in range(2)]
    for line in sys.stdin:
    pid, cites = line.strip().split()
    cites = int(cites)
    whichconf = 0 if ('emnlp-main' in pid) else 1
    for bid, bval in enumerate(histogram_buckets):
    if cites <= bval:
    conf_data[whichconf][bid] += 1
    break
    norm_data = [[float(x)/sum(y) for x in y] for y in conf_data]
    make_bar_chart(norm_data,'.','cites_diff', ['EMNLP 2020', 'EMNLP 2020 Findings'],xticklabels=[f'<={x}' for x in histogram_buckets], ylabel='ratio of papers')