from langchain.document_loaders import DirectoryLoader from langchain.text_splitter import CharacterTextSplitter import os import pinecone from langchain.vectorstores import Pinecone from langchain.embeddings.openai import OpenAIEmbeddings from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI import streamlit as st from dotenv import load_dotenv load_dotenv() PINECONE_API_KEY = os.getenv('PINECONE_API_KEY') PINECONE_ENV = os.getenv('PINECONE_ENV') OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY @st.cache_resource def embedding_db(): # we use the openAI embedding model embeddings = OpenAIEmbeddings() # Initialize Pinecone: Correct Indentation pc = pinecone.init( api_key=PINECONE_API_KEY, environment=PINECONE_ENV ) def doc_preprocessing(): loader = DirectoryLoader( 'data/', glob='**/*.pdf', # only the PDFs show_progress=True ) docs = loader.load() text_splitter = CharacterTextSplitter( chunk_size=1000, chunk_overlap=0 ) docs_split = text_splitter.split_documents(docs) return docs_split # docs_split = doc_preprocessing() # # Check if index exists, create if needed # if 'langchain-demo-indexes' not in pc.list_indexes().names(): # pc.create_index( # name='langchain-demo-indexes', # dimension=1536, # Adjust dimension if needed # metric='euclidean', # spec=ServerlessSpec(cloud='aws', region='us-west-2') # ) # doc_db = Pinecone.from_documents( # docs_split, # embeddings, # index_name='langchain-demo-indexes', # client=pc # Pass the Pinecone object # ) # return doc_db # llm = ChatOpenAI() # doc_db = embedding_db() def retrieval_answer(query): chat_model = ChatOpenAI() # Create the LLM instance qa = RetrievalQA.from_chain_type( llm=chat_model, # Pass the chat_model instance chain_type='stuff', retriever=doc_db.as_retriever(), ) query = query result = qa.run(query) return result def main(): st.title("Question and Answering App powered by LLM and Pinecone") text_input = st.text_input("Ask your query...") if st.button("Ask Query"): if len(text_input)>0: st.info("Your Query: " + text_input) answer = retrieval_answer(text_input) st.success(answer) if __name__ == "__main__": main()