Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain.document_loaders import PyPDFLoader
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
5 |
+
from langchain.vectorstores import Chroma
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
+
from langchain.prompts import PromptTemplate
|
8 |
+
from langchain.chat_models import ChatOpenAI
|
9 |
+
import os
|
10 |
+
|
11 |
+
# Streamlit app title
|
12 |
+
st.title("Question Answering with the Constitution of Pakistan")
|
13 |
+
|
14 |
+
# Load the PDF
|
15 |
+
pdf_path = "The Constitution of the Islamic Republic of Pakistan.pdf"
|
16 |
+
|
17 |
+
# Load data only once to optimize
|
18 |
+
@st.cache_data
|
19 |
+
def load_pdf_data(pdf_path):
|
20 |
+
loader = PyPDFLoader(pdf_path)
|
21 |
+
docs = loader.load()
|
22 |
+
return docs
|
23 |
+
|
24 |
+
docs = load_pdf_data(pdf_path)
|
25 |
+
|
26 |
+
# Split documents
|
27 |
+
@st.cache_data
|
28 |
+
def split_docs(docs):
|
29 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
|
30 |
+
return text_splitter.split_documents(docs)
|
31 |
+
|
32 |
+
splits = split_docs(docs)
|
33 |
+
|
34 |
+
# Load OpenAI embeddings
|
35 |
+
openai_api_key = st.secrets["openai_api_key"] # Keeping API key secret in Streamlit
|
36 |
+
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
37 |
+
|
38 |
+
# Vectorstore setup (Chroma)
|
39 |
+
persist_directory = 'docs/chroma/'
|
40 |
+
vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=persist_directory)
|
41 |
+
|
42 |
+
# Define LLM and chain
|
43 |
+
llm_name = "gpt-3.5-turbo"
|
44 |
+
llm = ChatOpenAI(model_name=llm_name, temperature=0, openai_api_key=openai_api_key)
|
45 |
+
|
46 |
+
# Custom PromptTemplate
|
47 |
+
template = """Use the following pieces of context to answer the question at the end.
|
48 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
49 |
+
Use three sentences maximum. Keep the answer as concise as possible.
|
50 |
+
Always say "thanks for asking!" at the end of the answer.
|
51 |
+
|
52 |
+
{context}
|
53 |
+
|
54 |
+
Question: {question}
|
55 |
+
Helpful Answer:"""
|
56 |
+
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
57 |
+
|
58 |
+
# Build the QA chain with restrictions
|
59 |
+
qa_chain = RetrievalQA.from_chain_type(
|
60 |
+
llm,
|
61 |
+
retriever=vectordb.as_retriever(),
|
62 |
+
return_source_documents=True,
|
63 |
+
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
|
64 |
+
)
|
65 |
+
|
66 |
+
# Streamlit user input
|
67 |
+
question = st.text_input("Ask a question about the Constitution of Pakistan:")
|
68 |
+
|
69 |
+
if st.button("Get Answer"):
|
70 |
+
if question:
|
71 |
+
with st.spinner('Generating answer...'):
|
72 |
+
result = qa_chain({"query": question})
|
73 |
+
st.write(result["result"]) # Display the concise answer
|
74 |
+
|
75 |
+
# Display source documents
|
76 |
+
st.subheader("Source Document:")
|
77 |
+
for doc in result["source_documents"]:
|
78 |
+
st.write(doc.page_content) # Show the content of the source document
|
79 |
+
else:
|
80 |
+
st.error("Please ask a question.")
|