Amelia-James commited on
Commit
a0efd80
·
verified ·
1 Parent(s): 9ae948c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.chat_models import ChatOpenAI
9
+ import os
10
+
11
+ # Streamlit app title
12
+ st.title("Question Answering with the Constitution of Pakistan")
13
+
14
+ # Load the PDF
15
+ pdf_path = "The Constitution of the Islamic Republic of Pakistan.pdf"
16
+
17
+ # Load data only once to optimize
18
+ @st.cache_data
19
+ def load_pdf_data(pdf_path):
20
+ loader = PyPDFLoader(pdf_path)
21
+ docs = loader.load()
22
+ return docs
23
+
24
+ docs = load_pdf_data(pdf_path)
25
+
26
+ # Split documents
27
+ @st.cache_data
28
+ def split_docs(docs):
29
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
30
+ return text_splitter.split_documents(docs)
31
+
32
+ splits = split_docs(docs)
33
+
34
+ # Load OpenAI embeddings
35
+ openai_api_key = st.secrets["openai_api_key"] # Keeping API key secret in Streamlit
36
+ embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
37
+
38
+ # Vectorstore setup (Chroma)
39
+ persist_directory = 'docs/chroma/'
40
+ vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=persist_directory)
41
+
42
+ # Define LLM and chain
43
+ llm_name = "gpt-3.5-turbo"
44
+ llm = ChatOpenAI(model_name=llm_name, temperature=0, openai_api_key=openai_api_key)
45
+
46
+ # Custom PromptTemplate
47
+ template = """Use the following pieces of context to answer the question at the end.
48
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
49
+ Use three sentences maximum. Keep the answer as concise as possible.
50
+ Always say "thanks for asking!" at the end of the answer.
51
+
52
+ {context}
53
+
54
+ Question: {question}
55
+ Helpful Answer:"""
56
+ QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
57
+
58
+ # Build the QA chain with restrictions
59
+ qa_chain = RetrievalQA.from_chain_type(
60
+ llm,
61
+ retriever=vectordb.as_retriever(),
62
+ return_source_documents=True,
63
+ chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
64
+ )
65
+
66
+ # Streamlit user input
67
+ question = st.text_input("Ask a question about the Constitution of Pakistan:")
68
+
69
+ if st.button("Get Answer"):
70
+ if question:
71
+ with st.spinner('Generating answer...'):
72
+ result = qa_chain({"query": question})
73
+ st.write(result["result"]) # Display the concise answer
74
+
75
+ # Display source documents
76
+ st.subheader("Source Document:")
77
+ for doc in result["source_documents"]:
78
+ st.write(doc.page_content) # Show the content of the source document
79
+ else:
80
+ st.error("Please ask a question.")