Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.chains import RetrievalQA | |
from langchain.prompts import PromptTemplate | |
from langchain.chat_models import ChatOpenAI | |
import os | |
# Streamlit app title | |
st.title("Question Answering with the Constitution of Pakistan") | |
# Load the PDF | |
pdf_path = "The Constitution of the Islamic Republic of Pakistan.pdf" | |
# Load data only once to optimize | |
def load_pdf_data(pdf_path): | |
loader = PyPDFLoader(pdf_path) | |
docs = loader.load() | |
return docs | |
docs = load_pdf_data(pdf_path) | |
# Split documents | |
def split_docs(_docs): # Rename the parameter to _docs to avoid hashing it | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) | |
return text_splitter.split_documents(_docs) | |
splits = split_docs(docs) | |
# Load OpenAI embeddings | |
openai_api_key = st.secrets["openai_api_key"] # Keeping API key secret in Streamlit | |
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
# Vectorstore setup (Chroma) | |
persist_directory = 'docs/chroma/' | |
vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=persist_directory) | |
# Define LLM and chain | |
llm_name = "gpt-3.5-turbo" | |
llm = ChatOpenAI(model_name=llm_name, temperature=0, openai_api_key=openai_api_key) | |
# Custom PromptTemplate | |
template = """Use the following pieces of context to answer the question at the end. | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Use three sentences maximum. Keep the answer as concise as possible. | |
Always say "thanks for asking!" at the end of the answer. | |
{context} | |
Question: {question} | |
Helpful Answer:""" | |
QA_CHAIN_PROMPT = PromptTemplate.from_template(template) | |
# Build the QA chain with restrictions | |
qa_chain = RetrievalQA.from_chain_type( | |
llm, | |
retriever=vectordb.as_retriever(), | |
return_source_documents=True, | |
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} | |
) | |
# Streamlit user input | |
question = st.text_input("Ask a question about the Constitution of Pakistan:") | |
if st.button("Get Answer"): | |
if question: | |
with st.spinner('Generating answer...'): | |
result = qa_chain({"query": question}) | |
st.write(result["result"]) # Display the concise answer | |
# Display source documents | |
st.subheader("Source Document:") | |
for doc in result["source_documents"]: | |
st.write(doc.page_content) # Show the content of the source document | |
else: | |
st.error("Please ask a question.") | |