Spaces:

Amelia-James
/

rag-constitution-app

Sleeping

App Files Files Community

rag-constitution-app / app.py

Amelia-James

Update app.py

f4b6e1a verified 8 months ago

raw

history blame contribute delete

2.74 kB

	import streamlit as st
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain.chat_models import ChatOpenAI
	import os

	# Streamlit app title
	st.title("Question Answering with the Constitution of Pakistan")

	# Load the PDF
	pdf_path = "The Constitution of the Islamic Republic of Pakistan.pdf"

	# Load data only once to optimize
	@st.cache_data
	def load_pdf_data(pdf_path):
	loader = PyPDFLoader(pdf_path)
	docs = loader.load()
	return docs

	docs = load_pdf_data(pdf_path)

	# Split documents
	@st.cache_data
	def split_docs(_docs): # Rename the parameter to _docs to avoid hashing it
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
	return text_splitter.split_documents(_docs)

	splits = split_docs(docs)

	# Load OpenAI embeddings
	openai_api_key = st.secrets["openai_api_key"] # Keeping API key secret in Streamlit
	embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)

	# Vectorstore setup (Chroma)
	persist_directory = 'docs/chroma/'
	vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=persist_directory)

	# Define LLM and chain
	llm_name = "gpt-3.5-turbo"
	llm = ChatOpenAI(model_name=llm_name, temperature=0, openai_api_key=openai_api_key)

	# Custom PromptTemplate
	template = """Use the following pieces of context to answer the question at the end.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.
	Use three sentences maximum. Keep the answer as concise as possible.
	Always say "thanks for asking!" at the end of the answer.

	{context}

	Question: {question}
	Helpful Answer:"""
	QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

	# Build the QA chain with restrictions
	qa_chain = RetrievalQA.from_chain_type(
	llm,
	retriever=vectordb.as_retriever(),
	return_source_documents=True,
	chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
	)

	# Streamlit user input
	question = st.text_input("Ask a question about the Constitution of Pakistan:")

	if st.button("Get Answer"):
	if question:
	with st.spinner('Generating answer...'):
	result = qa_chain({"query": question})
	st.write(result["result"]) # Display the concise answer

	# Display source documents
	st.subheader("Source Document:")
	for doc in result["source_documents"]:
	st.write(doc.page_content) # Show the content of the source document
	else:
	st.error("Please ask a question.")