Created
November 20, 2017 15:36
-
-
Save tmtmtmtm/285bdea45c41d578e7fdae93dff8462b to your computer and use it in GitHub Desktop.
Revisions
-
tmtmtmtm created this gist
Nov 20, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,99 @@ #!/usr/bin/env ruby # encoding: utf-8 # frozen_string_literal: true require 'pry' require 'json' require 'rest-client' WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql' P_POSITION = 'P39' P_START_DATE = 'P580' P_END_DATE = 'P582' P_CONSTITUENCY = 'P768' P_PARTY = 'P4100' P_ELECTED_IN = 'P2715' P_TERM = 'P2937' SOURCE = 'S854' class WikidataValue def initialize(data) @data = data end def value return raw_value.split('/').last if wikidata_link? raw_value end def type @data[:type] end def wikidata_link? type == 'uri' && raw_value.start_with?('http://www.wikidata.org/entity') end private def raw_value @data[:value] end end def wikidata_data(query) result = RestClient.get WIKIDATA_SPARQL_URL, params: { query: query, format: 'json' } json = JSON.parse(result, symbolize_names: true) json[:results][:bindings].map { |r| r.map { |k, v| [k, WikidataValue.new(v)] }.to_h } rescue RestClient::Exception => e raise "Wikidata query #{query} failed: #{e.message}" end def morph_data(scraper) morph_api_url = 'https://api.morph.io/%s/data.json' % scraper morph_api_key = ENV['MORPH_API_KEY'] result = RestClient.get morph_api_url, params: { key: morph_api_key, query: 'SELECT * FROM data', } JSON.parse(result, symbolize_names: true) end query = <<~SPARQL SELECT DISTINCT ?item ?itemLabel ?constituency ?party WHERE { ?item p:P39 ?mem . ?mem ps:P39 wd:%s ; pq:P2937 wd:%s . OPTIONAL { ?mem pq:P768 ?constituency } OPTIONAL { ?mem pq:P4100 ?party } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } ORDER BY ?itemLabel SPARQL #----------------------------------------------------------------------- MEMBERSHIP = 'Q654291' TERM = 'Q28976095' existing = wikidata_data(query % [MEMBERSHIP, TERM]).map do |r| [r[:item].value, { party: r[:party]&.value, constituency: r[:constituency]&.value, },] end.to_h incoming = morph_data('everypolitician-scrapers/ireland-dail-members-wikipedia') statements = incoming.reject { |r| existing[r[:id]] }.map do |r| data = { P_POSITION => MEMBERSHIP, P_TERM => TERM, P_PARTY => r[:party_wikidata], P_CONSTITUENCY => r[:constituency_wikidata], SOURCE => '"https://en.wikipedia.org/wiki/Members_of_the_32nd_D%C3%A1il"', } [r[:id], data.to_a].join("\t") end puts statements