// Get the pdf loader from langchain
import { PDFLoader } from "langchain/document_loaders/fs/pdf";
// import the RecursiveCharacterTextSplitter from langchain
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
// We will use open ai embeddings from langchain and import it
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
// Use HNSWLib as our vector db
import { HNSWLib } from "langchain/vectorstores/hnswlib";
// import the chain for connecting llm with vectore store
import { RetrievalQAChain } from "langchain/chains";
// import the open ai function to load our LLM model
import { OpenAI } from "langchain/llms/openai";


// Initialize it with the path to the pdf file
const loader = new PDFLoader("src/documents/budget_speech.pdf");

// Load into docs variable
const docs = await loader.load();

// Initialize it with chunksize and chunkOverlap
const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 1000,
  chunkOverlap: 20,
});

// created chunks from pdf
const splittedDocs = await splitter.splitDocuments(docs);

// Init open ai embeddings
const embeddings = new OpenAIEmbeddings();

// Finally store our splitted chunks with open ai embeddings
const vectorStore = await HNSWLib.fromDocuments(
  splittedDocs,
  embeddings
);

// Create vector store retriever
const vectorStoreRetriever = vectorStore.asRetriever();

// init the LLM model
const model = new OpenAI({
  modelName: 'gpt-3.5-turbo',
  openAIApiKey: process.env.OPENAI_API_KEY
});

// Finally create the chain to connect both and answer questions 
const chain = RetrievalQAChain.fromLLM(model, vectorStoreRetriever);

const question = 'What is the theme of G20?';

const answer = await chain.call({
  query: question
});

console.log({
  question,
  answer
});