Amelia-James's picture
Update app.py
f4b6e1a verified
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
import os
# Streamlit app title
st.title("Question Answering with the Constitution of Pakistan")
# Load the PDF
pdf_path = "The Constitution of the Islamic Republic of Pakistan.pdf"
# Load data only once to optimize
@st.cache_data
def load_pdf_data(pdf_path):
loader = PyPDFLoader(pdf_path)
docs = loader.load()
return docs
docs = load_pdf_data(pdf_path)
# Split documents
@st.cache_data
def split_docs(_docs): # Rename the parameter to _docs to avoid hashing it
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
return text_splitter.split_documents(_docs)
splits = split_docs(docs)
# Load OpenAI embeddings
openai_api_key = st.secrets["openai_api_key"] # Keeping API key secret in Streamlit
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Vectorstore setup (Chroma)
persist_directory = 'docs/chroma/'
vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=persist_directory)
# Define LLM and chain
llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=llm_name, temperature=0, openai_api_key=openai_api_key)
# Custom PromptTemplate
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum. Keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
# Build the QA chain with restrictions
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=vectordb.as_retriever(),
return_source_documents=True,
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)
# Streamlit user input
question = st.text_input("Ask a question about the Constitution of Pakistan:")
if st.button("Get Answer"):
if question:
with st.spinner('Generating answer...'):
result = qa_chain({"query": question})
st.write(result["result"]) # Display the concise answer
# Display source documents
st.subheader("Source Document:")
for doc in result["source_documents"]:
st.write(doc.page_content) # Show the content of the source document
else:
st.error("Please ask a question.")