Skip to content

Instantly share code, notes, and snippets.

@tmtmtmtm
Created November 20, 2017 15:36
Show Gist options
  • Select an option

  • Save tmtmtmtm/285bdea45c41d578e7fdae93dff8462b to your computer and use it in GitHub Desktop.

Select an option

Save tmtmtmtm/285bdea45c41d578e7fdae93dff8462b to your computer and use it in GitHub Desktop.

Revisions

  1. tmtmtmtm created this gist Nov 20, 2017.
    99 changes: 99 additions & 0 deletions dail-32-position-statements.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,99 @@
    #!/usr/bin/env ruby
    # encoding: utf-8
    # frozen_string_literal: true

    require 'pry'
    require 'json'
    require 'rest-client'

    WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql'

    P_POSITION = 'P39'
    P_START_DATE = 'P580'
    P_END_DATE = 'P582'
    P_CONSTITUENCY = 'P768'
    P_PARTY = 'P4100'
    P_ELECTED_IN = 'P2715'
    P_TERM = 'P2937'
    SOURCE = 'S854'

    class WikidataValue
    def initialize(data)
    @data = data
    end

    def value
    return raw_value.split('/').last if wikidata_link?
    raw_value
    end

    def type
    @data[:type]
    end

    def wikidata_link?
    type == 'uri' && raw_value.start_with?('http://www.wikidata.org/entity')
    end

    private

    def raw_value
    @data[:value]
    end
    end

    def wikidata_data(query)
    result = RestClient.get WIKIDATA_SPARQL_URL, params: { query: query, format: 'json' }
    json = JSON.parse(result, symbolize_names: true)
    json[:results][:bindings].map { |r| r.map { |k, v| [k, WikidataValue.new(v)] }.to_h }
    rescue RestClient::Exception => e
    raise "Wikidata query #{query} failed: #{e.message}"
    end

    def morph_data(scraper)
    morph_api_url = 'https://api.morph.io/%s/data.json' % scraper
    morph_api_key = ENV['MORPH_API_KEY']
    result = RestClient.get morph_api_url, params: {
    key: morph_api_key,
    query: 'SELECT * FROM data',
    }
    JSON.parse(result, symbolize_names: true)
    end

    query = <<~SPARQL
    SELECT DISTINCT ?item ?itemLabel ?constituency ?party WHERE {
    ?item p:P39 ?mem .
    ?mem ps:P39 wd:%s ; pq:P2937 wd:%s .
    OPTIONAL { ?mem pq:P768 ?constituency }
    OPTIONAL { ?mem pq:P4100 ?party }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    ORDER BY ?itemLabel
    SPARQL

    #-----------------------------------------------------------------------

    MEMBERSHIP = 'Q654291'
    TERM = 'Q28976095'

    existing = wikidata_data(query % [MEMBERSHIP, TERM]).map do |r|
    [r[:item].value, {
    party: r[:party]&.value,
    constituency: r[:constituency]&.value,
    },]
    end.to_h

    incoming = morph_data('everypolitician-scrapers/ireland-dail-members-wikipedia')

    statements = incoming.reject { |r| existing[r[:id]] }.map do |r|
    data = {
    P_POSITION => MEMBERSHIP,
    P_TERM => TERM,
    P_PARTY => r[:party_wikidata],
    P_CONSTITUENCY => r[:constituency_wikidata],
    SOURCE => '"https://en.wikipedia.org/wiki/Members_of_the_32nd_D%C3%A1il"',
    }
    [r[:id], data.to_a].join("\t")
    end

    puts statements