import streamlit as st from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.chat_models import ChatOpenAI import os # Streamlit app title st.title("Question Answering with the Constitution of Pakistan") # Load the PDF pdf_path = "The Constitution of the Islamic Republic of Pakistan.pdf" # Load data only once to optimize @st.cache_data def load_pdf_data(pdf_path): loader = PyPDFLoader(pdf_path) docs = loader.load() return docs docs = load_pdf_data(pdf_path) # Split documents @st.cache_data def split_docs(_docs): # Rename the parameter to _docs to avoid hashing it text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) return text_splitter.split_documents(_docs) splits = split_docs(docs) # Load OpenAI embeddings openai_api_key = st.secrets["openai_api_key"] # Keeping API key secret in Streamlit embedding = OpenAIEmbeddings(openai_api_key=openai_api_key) # Vectorstore setup (Chroma) persist_directory = 'docs/chroma/' vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=persist_directory) # Define LLM and chain llm_name = "gpt-3.5-turbo" llm = ChatOpenAI(model_name=llm_name, temperature=0, openai_api_key=openai_api_key) # Custom PromptTemplate template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. {context} Question: {question} Helpful Answer:""" QA_CHAIN_PROMPT = PromptTemplate.from_template(template) # Build the QA chain with restrictions qa_chain = RetrievalQA.from_chain_type( llm, retriever=vectordb.as_retriever(), return_source_documents=True, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} ) # Streamlit user input question = st.text_input("Ask a question about the Constitution of Pakistan:") if st.button("Get Answer"): if question: with st.spinner('Generating answer...'): result = qa_chain({"query": question}) st.write(result["result"]) # Display the concise answer # Display source documents st.subheader("Source Document:") for doc in result["source_documents"]: st.write(doc.page_content) # Show the content of the source document else: st.error("Please ask a question.")