-
-
Save davosian/cdda3f88b3307014e32b46d61236133d to your computer and use it in GitHub Desktop.
Revisions
-
ahue renamed this gist
Feb 14, 2021 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
ahue created this gist
Feb 14, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,53 @@ #! /usr/bin/python3 try: # For Python 3.0 and later from urllib.request import urlopen from urllib.parse import quote except ImportError: # Fall back to Python 2's urllib2 from urllib2 import urlopen from urllib2 import quote import json def get_jsonparsed_data(url): """ Receive the content of ``url``, parse it as JSON and return the object. Parameters ---------- url : str Returns ------- dict """ response = urlopen(url) data = response.read().decode("utf-8") return json.loads(data) appendices = [ "Verzeichnis:Deutsch/Essen_und_Trinken/Lebensmittel", "Verzeichnis:Deutsch/Essen_und_Trinken/Obst_und_Gem%C3%BCse", "Verzeichnis:Deutsch/Essen_und_Trinken/Speisen", "Verzeichnis:Deutsch/Essen_und_Trinken/Getr%C3%A4nke" ] base_url = "https://de.wiktionary.org/w/api.php?format=json&action=query&titles={}&prop=links&formatversion=2&pllimit=500" plcont = "&plcontinue={}" words = [] for appendix in appendices: # print(appendix) url = base_url.format(appendix) url2 = url while True: # print(url2) res = get_jsonparsed_data(url2) # print(res) words += [d["title"] for d in res["query"]["pages"][0]["links"] if ":" not in d["title"]] if not "continue" in res: break; url2 = url + plcont.format(quote(res["continue"]["plcontinue"])) print("\n".join(words))