Skip to content

Instantly share code, notes, and snippets.

@davosian
Forked from ahue/groceries_wiktionary
Created July 19, 2021 14:57
Show Gist options
  • Save davosian/cdda3f88b3307014e32b46d61236133d to your computer and use it in GitHub Desktop.
Save davosian/cdda3f88b3307014e32b46d61236133d to your computer and use it in GitHub Desktop.

Revisions

  1. @ahue ahue renamed this gist Feb 14, 2021. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. @ahue ahue created this gist Feb 14, 2021.
    53 changes: 53 additions & 0 deletions groceries
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,53 @@
    #! /usr/bin/python3
    try:
    # For Python 3.0 and later
    from urllib.request import urlopen
    from urllib.parse import quote
    except ImportError:
    # Fall back to Python 2's urllib2
    from urllib2 import urlopen
    from urllib2 import quote

    import json


    def get_jsonparsed_data(url):
    """
    Receive the content of ``url``, parse it as JSON and return the object.
    Parameters
    ----------
    url : str
    Returns
    -------
    dict
    """
    response = urlopen(url)
    data = response.read().decode("utf-8")
    return json.loads(data)

    appendices = [
    "Verzeichnis:Deutsch/Essen_und_Trinken/Lebensmittel",
    "Verzeichnis:Deutsch/Essen_und_Trinken/Obst_und_Gem%C3%BCse",
    "Verzeichnis:Deutsch/Essen_und_Trinken/Speisen",
    "Verzeichnis:Deutsch/Essen_und_Trinken/Getr%C3%A4nke"
    ]
    base_url = "https://de.wiktionary.org/w/api.php?format=json&action=query&titles={}&prop=links&formatversion=2&pllimit=500"
    plcont = "&plcontinue={}"

    words = []
    for appendix in appendices:
    # print(appendix)
    url = base_url.format(appendix)
    url2 = url
    while True:
    # print(url2)
    res = get_jsonparsed_data(url2)
    # print(res)
    words += [d["title"] for d in res["query"]["pages"][0]["links"] if ":" not in d["title"]]
    if not "continue" in res:
    break;
    url2 = url + plcont.format(quote(res["continue"]["plcontinue"]))

    print("\n".join(words))