Skip to content

Instantly share code, notes, and snippets.

@hwchase17
Created May 11, 2023 22:39
Show Gist options
  • Save hwchase17/8bb41c048a6facb881634fc55c54a55d to your computer and use it in GitHub Desktop.
Save hwchase17/8bb41c048a6facb881634fc55c54a55d to your computer and use it in GitHub Desktop.

Revisions

  1. hwchase17 created this gist May 11, 2023.
    58 changes: 58 additions & 0 deletions langchain-youtube-selfquery.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    from langchain.document_loaders import YoutubeLoader
    from langchain.indexes import VectorstoreIndexCreator
    urls = [
    ("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"),
    ("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"),
    ("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"),
    ("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"),
    ("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"),
    ("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"),
    ("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"),
    ("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"),

    ]
    docs = []
    for url, title in urls:
    loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
    _docs = loader.load()
    for d in _docs:
    d.metadata["name"] = title
    docs.append(d)

    from langchain.schema import Document
    from langchain.embeddings import CohereEmbeddings
    from langchain.vectorstores import Chroma
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain.chains import RetrievalQA
    embeddings = CohereEmbeddings()

    docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs)

    vectorstore = Chroma.from_documents(
    docs, embeddings
    )

    from langchain.llms import OpenAI
    llm = OpenAI(temperature=0)

    vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

    vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?")

    from langchain.retrievers.self_query.base import SelfQueryRetriever
    from langchain.chains.query_constructor.base import AttributeInfo

    metadata_field_info=[
    AttributeInfo(
    name="name",
    description=f"The name of the video, should be one of: {[t for _, t in urls]}",
    type="string or list[string]",
    ),
    ]
    document_content_description = "excerpts from langchain webinars"

    retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)

    chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

    chain.run("what did they say about prompt injection in the agents in production webinar?")