import openai import streamlit as st from streamlit_chat import message from dotenv import load_dotenv import os from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma import openai from langchain.document_loaders import UnstructuredMarkdownLoader from langchain.chains.question_answering import load_qa_chain from langchain.chat_models import ChatOpenAI from langchain.document_loaders import UnstructuredPDFLoader load_dotenv() openai.api_key = os.environ.get('OPENAI_API_KEY', 'sk-9azBt6Dd8j7p5z5Lwq2S9EhmkVX48GtN2Kt2t3GJGN94SQ2') persist_directory = 'ai_paper1' embeddings = OpenAIEmbeddings() if not os.path.exists(persist_directory): print('embedding the document now') loader = UnstructuredPDFLoader('ai_paper.pdf', mode="elements") pages = loader.load_and_split() vectordb = Chroma.from_documents(documents=pages, embedding=embeddings, persist_directory=persist_directory) vectordb.persist()