Created
          May 11, 2023 22:39 
        
      - 
      
- 
        Save hwchase17/8bb41c048a6facb881634fc55c54a55d to your computer and use it in GitHub Desktop. 
Revisions
- 
        hwchase17 created this gist May 11, 2023 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,58 @@ from langchain.document_loaders import YoutubeLoader from langchain.indexes import VectorstoreIndexCreator urls = [ ("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"), ("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"), ("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"), ("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"), ("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"), ("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"), ("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"), ("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"), ] docs = [] for url, title in urls: loader = YoutubeLoader.from_youtube_url(url, add_video_info=False) _docs = loader.load() for d in _docs: d.metadata["name"] = title docs.append(d) from langchain.schema import Document from langchain.embeddings import CohereEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA embeddings = CohereEmbeddings() docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs) vectorstore = Chroma.from_documents( docs, embeddings ) from langchain.llms import OpenAI llm = OpenAI(temperature=0) vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever()) vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?") from langchain.retrievers.self_query.base import SelfQueryRetriever from langchain.chains.query_constructor.base import AttributeInfo metadata_field_info=[ AttributeInfo( name="name", description=f"The name of the video, should be one of: {[t for _, t in urls]}", type="string or list[string]", ), ] document_content_description = "excerpts from langchain webinars" retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True) chain = RetrievalQA.from_chain_type(llm, retriever=retriever) chain.run("what did they say about prompt injection in the agents in production webinar?")