// Get the pdf loader from langchain import { PDFLoader } from "langchain/document_loaders/fs/pdf"; // import the RecursiveCharacterTextSplitter from langchain import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; // We will use open ai embeddings from langchain and import it import { OpenAIEmbeddings } from "langchain/embeddings/openai"; // Use HNSWLib as our vector db import { HNSWLib } from "langchain/vectorstores/hnswlib"; // import the chain for connecting llm with vectore store import { RetrievalQAChain } from "langchain/chains"; // import the open ai function to load our LLM model import { OpenAI } from "langchain/llms/openai"; // Initialize it with the path to the pdf file const loader = new PDFLoader("src/documents/budget_speech.pdf"); // Load into docs variable const docs = await loader.load(); // Initialize it with chunksize and chunkOverlap const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 20, }); // created chunks from pdf const splittedDocs = await splitter.splitDocuments(docs); // Init open ai embeddings const embeddings = new OpenAIEmbeddings(); // Finally store our splitted chunks with open ai embeddings const vectorStore = await HNSWLib.fromDocuments( splittedDocs, embeddings ); // Create vector store retriever const vectorStoreRetriever = vectorStore.asRetriever(); // init the LLM model const model = new OpenAI({ modelName: 'gpt-3.5-turbo', openAIApiKey: process.env.OPENAI_API_KEY }); // Finally create the chain to connect both and answer questions const chain = RetrievalQAChain.fromLLM(model, vectorStoreRetriever); const question = 'What is the theme of G20?'; const answer = await chain.call({ query: question }); console.log({ question, answer });