vivek12345 · October 18, 2023 16:44
diff --git a/pdf-reader-bot.ts b/pdf-reader-bot.ts
 // Get the pdf loader from langchain
 import { PDFLoader } from "langchain/document_loaders/fs/pdf";
 // import the RecursiveCharacterTextSplitter from langchain
 import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
 // We will use open ai embeddings from langchain and import it
 import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 // Use HNSWLib as our vector db
 import { HNSWLib } from "langchain/vectorstores/hnswlib";
 // import the chain for connecting llm with vectore store
 import { RetrievalQAChain } from "langchain/chains";
 // import the open ai function to load our LLM model
 import { OpenAI } from "langchain/llms/openai";


 // Initialize it with the path to the pdf file
 const loader = new PDFLoader("src/documents/budget_speech.pdf");

 // Load into docs variable
 const docs = await loader.load();

 // Initialize it with chunksize and chunkOverlap
 const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 1000,
  chunkOverlap: 20,
 });

 // created chunks from pdf
 const splittedDocs = await splitter.splitDocuments(docs);

 // Init open ai embeddings
 const embeddings = new OpenAIEmbeddings();

 // Finally store our splitted chunks with open ai embeddings
 const vectorStore = await HNSWLib.fromDocuments(
  splittedDocs,
  embeddings
 );

 // Create vector store retriever
 const vectorStoreRetriever = vectorStore.asRetriever();

 // init the LLM model
 const model = new OpenAI({
  modelName: 'gpt-3.5-turbo',
  openAIApiKey: process.env.OPENAI_API_KEY
 });

 // Finally create the chain to connect both and answer questions 
 const chain = RetrievalQAChain.fromLLM(model, vectorStoreRetriever);

 const question = 'What is the theme of G20?';

 const answer = await chain.call({
  query: question
 });

 console.log({
  question,
  answer
 });
	// Get the pdf loader from langchain
	import { PDFLoader } from "langchain/document_loaders/fs/pdf";
	// import the RecursiveCharacterTextSplitter from langchain
	import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
	// We will use open ai embeddings from langchain and import it
	import { OpenAIEmbeddings } from "langchain/embeddings/openai";
	// Use HNSWLib as our vector db
	import { HNSWLib } from "langchain/vectorstores/hnswlib";
	// import the chain for connecting llm with vectore store
	import { RetrievalQAChain } from "langchain/chains";
	// import the open ai function to load our LLM model
	import { OpenAI } from "langchain/llms/openai";


	// Initialize it with the path to the pdf file
	const loader = new PDFLoader("src/documents/budget_speech.pdf");

	// Load into docs variable
	const docs = await loader.load();

	// Initialize it with chunksize and chunkOverlap
	const splitter = new RecursiveCharacterTextSplitter({
	chunkSize: 1000,
	chunkOverlap: 20,
	});

	// created chunks from pdf
	const splittedDocs = await splitter.splitDocuments(docs);

	// Init open ai embeddings
	const embeddings = new OpenAIEmbeddings();

	// Finally store our splitted chunks with open ai embeddings
	const vectorStore = await HNSWLib.fromDocuments(
	splittedDocs,
	embeddings
	);

	// Create vector store retriever
	const vectorStoreRetriever = vectorStore.asRetriever();

	// init the LLM model
	const model = new OpenAI({
	modelName: 'gpt-3.5-turbo',
	openAIApiKey: process.env.OPENAI_API_KEY
	});

	// Finally create the chain to connect both and answer questions
	const chain = RetrievalQAChain.fromLLM(model, vectorStoreRetriever);

	const question = 'What is the theme of G20?';

	const answer = await chain.call({
	query: question
	});

	console.log({
	question,
	answer
	});