from langchain.document_loaders import DirectoryLoader from langchain.text_splitter import CharacterTextSplitter import os import pinecone from langchain.vectorstores import Pinecone from langchain.embeddings.openai import OpenAIEmbeddings from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI import streamlit as st from dotenv import load_dotenv load_dotenv() PINECONE_API_KEY = os.getenv('PINECONE_API_KEY') PINECONE_ENV = os.getenv('PINECONE_ENV') OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY @st.cache_resource def embedding_db(): embeddings = OpenAIEmbeddings() # Using 'from pinecone import Pinecone' explicitly: from pinecone import Pinecone pc = Pinecone( api_key=PINECONE_API_KEY, environment=PINECONE_ENV ) docs_split = doc_preprocessing() # Make sure this function is defined doc_db = Pinecone.from_documents( docs_split, embeddings, index_name='langchain-demo-indexes', client=pc ) return doc_db def doc_preprocessing(): loader = DirectoryLoader( 'data/', glob='**/*.pdf', show_progress=True ) # Extra closing parenthesis removed docs = loader.load() text_splitter = CharacterTextSplitter( chunk_size=1000, chunk_overlap=0 ) docs_split = text_splitter.split_documents(docs) return docs_split def retrieval_answer(query): chat_model = ChatOpenAI() qa = RetrievalQA.from_chain_type( llm=chat_model, chain_type='stuff', retriever=doc_db.as_retriever(), ) result = qa.run(query) return result def main(): st.title("Question and Answering App powered by LLM and Pinecone") text_input = st.text_input("Ask your query...") if st.button("Ask Query"): if len(text_input) > 0: st.info("Your Query: " + text_input) # Potential loading message with st.spinner("Processing your query..."): doc_db = embedding_db() # Create the embedding database answer = retrieval_answer(text_input) st.success(answer) if __name__ == "__main__": main()