This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import mwclient # for downloading example Wikipedia articles | |
| import mwparserfromhell # for splitting Wikipedia articles into sections | |
| import pandas as pd # for DataFrames to store article sections and embeddings | |
| import re # for cutting <ref> links out of Wikipedia articles | |
| # import openai # for generating embeddings | |
| # import tiktoken # for counting tokens | |
| WIKI_SITE = "en.wikipedia.org" | |
| SECTIONS_TO_INCLUDE= [ | |
| "Plot", |