import streamlit as st from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA from langchain_community.document_loaders import TextLoader import chromadb chromadb.api.client.SharedSystemClient.clear_system_cache() import os os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY') # Initialize the embeddings and model embd = OpenAIEmbeddings() llm = ChatOpenAI(model_name="gpt-4o", temperature=0) # Initialize conversation history if "conversation_history" not in st.session_state: st.session_state.conversation_history = [] # Define the Streamlit app st.title("Text File Question-Answering with History") st.subheader("Upload a text file and ask questions. The app will maintain a conversation history.") # File upload section uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) from langchain.docstore.document import Document if uploaded_file: # Read and decode the content of the uploaded file file_content = uploaded_file.read().decode("utf-8") # Convert the content into a LangChain document document = [Document(page_content=file_content)] # Split the loaded document text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) doc_splits = text_splitter.split_documents(document) # Create a vector store vectorstore = Chroma.from_documents( documents=doc_splits, collection_name="conversation_history", embedding=embd, persist_directory=None ) retriever = vectorstore.as_retriever() # Initialize the QA chain qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, ) # Question-answering section query = st.text_input("Ask a question:") if query: # Process the query result = qa_chain({"query": query}) answer = result["result"] sources = result["source_documents"] # Append to conversation history st.session_state.conversation_history.append((query, answer, sources)) # Display the current answer st.write("**Answer:**", answer) # Display the sources st.subheader("Source Documents") for i, doc in enumerate(sources, start=1): st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}") st.write(doc.page_content[:500]) # Display the first 500 characters of the source content # Display conversation history st.subheader("Conversation History") for idx, (q, a, s) in enumerate(st.session_state.conversation_history, 1): st.write(f"**Q{idx}:** {q}") st.write(f"**A{idx}:** {a}") st.write(f"**Sources for Q{idx}:**") for i, doc in enumerate(s, start=1): st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}") st.write(doc.page_content[:300]) # Show a snippet for brevity