Skip to content

Instantly share code, notes, and snippets.

@carteakey
carteakey / wikipedia-movie-summary.py
Created June 24, 2023 01:05
Get movie plot summaries from wikipedia
import mwclient # for downloading example Wikipedia articles
import mwparserfromhell # for splitting Wikipedia articles into sections
import pandas as pd # for DataFrames to store article sections and embeddings
import re # for cutting <ref> links out of Wikipedia articles
# import openai # for generating embeddings
# import tiktoken # for counting tokens
WIKI_SITE = "en.wikipedia.org"
SECTIONS_TO_INCLUDE= [
"Plot",