SoumyaJ commited on
Commit
b379775
·
verified ·
1 Parent(s): 473ac4d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +156 -0
  2. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_groq import ChatGroq
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain.chains import create_retrieval_chain
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from dotenv import load_dotenv
12
+ from PyPDF2 import PdfReader
13
+ import time
14
+
15
+ load_dotenv()
16
+
17
+ ##CSS for the background and sidebar styling
18
+ st.markdown(
19
+ """
20
+ <style>
21
+ .stApp {
22
+ background-image: url('https://www.transparenttextures.com/patterns/white-leather.png');
23
+ background-size: cover;
24
+ }
25
+ .sidebar .sidebar-content {
26
+ padding: 20px;
27
+ background-image: url('https://www.transparenttextures.com/patterns/asfalt-light.png');
28
+ background-size: cover;
29
+ border-radius: 10px;
30
+ box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
31
+ }
32
+ .sidebar .bottom-button {
33
+ position: fixed;
34
+ bottom: 20px;
35
+ left: 20px;
36
+ width: calc(100% - 40px);
37
+ }
38
+ </style>
39
+ """,
40
+ unsafe_allow_html=True
41
+ )
42
+
43
+ os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
44
+ groq_api_key = os.getenv("GROQ_API_KEY")
45
+
46
+ #documentloader -> text splitter -> embeddings -> vector store -> use retriever chains
47
+ embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
48
+
49
+ llm = ChatGroq(model = "Llama3-8b-8192",api_key = groq_api_key)
50
+
51
+ prompt_template = ChatPromptTemplate.from_template("""
52
+ Answer the following question from the provided context only.
53
+ Please provide the most accurate response based on the question
54
+ <context>
55
+ {context}
56
+ </context>
57
+ Question : {input}
58
+ """)
59
+
60
+ def get_pdf_text(pdf_docs):
61
+ text=""
62
+ for pdf in pdf_docs:
63
+ pdf_reader= PdfReader(pdf)
64
+ for page in pdf_reader.pages:
65
+ text+= page.extract_text()
66
+ return text
67
+
68
+ def create_vector_embeddings(pdfText):
69
+ if "vectors" not in st.session_state:
70
+ st.session_state.docs = get_pdf_text(pdfText)
71
+ st.session_state.splitter = RecursiveCharacterTextSplitter(chunk_size=1200,chunk_overlap=400)
72
+ st.session_state.final_docs = st.session_state.splitter.split_text(st.session_state.docs)
73
+ st.session_state.vectors = FAISS.from_texts(st.session_state.final_docs, embeddings)
74
+
75
+ if "options" not in st.session_state:
76
+ st.session_state.options = ["Select a query"]
77
+
78
+ if "user_prompt" not in st.session_state:
79
+ st.session_state.user_prompt = ""
80
+
81
+ def autopopulate_promptsbydoctype(uploaded_text):
82
+ if uploaded_text and uploaded_text[0].name.endswith("pdf"):
83
+ #autopopulate all the questions in pdf
84
+ itemsToAppend = ["get all the programme details including rights and tape content etc in pointwise manner, dont miss any info",
85
+ "give a structured short summary of the programmes and details",
86
+ "give me programme package with programme details listed"]
87
+
88
+ for itemToAppend in itemsToAppend:
89
+ if itemToAppend not in st.session_state.options:
90
+ st.session_state.options.append(itemToAppend)
91
+
92
+ st.title("Basic Document QnA")
93
+
94
+ with st.sidebar:
95
+ st.title("Menu:")
96
+ #if "uploaded_text" not in st.session_state:
97
+ st.session_state.uploaded_text = st.file_uploader("Upload your Files and Click on the Submit & Process Button", accept_multiple_files=True)
98
+ if st.button("Click To Process File"):
99
+ with st.spinner("Processing..."):
100
+ create_vector_embeddings(st.session_state.uploaded_text)
101
+ st.write("Vector Database is ready")
102
+ autopopulate_promptsbydoctype(st.session_state.uploaded_text)
103
+
104
+ # st.markdown('<div class="bottom-button">', unsafe_allow_html=True)
105
+ # params = ['docs', 'splitter','final_docs']
106
+ # if st.button("Clean Current Document Settings") and st.session_state.keys():
107
+ # with st.spinner("Cleaning In Progress...."):
108
+ # for param in params:
109
+ # if param in st.session_state:
110
+ # del st.session_state[param]
111
+
112
+ # st.session_state['uploaded_text'] = ""
113
+ # st.write("Cleanup completed..")
114
+ # st.markdown('</div>', unsafe_allow_html=True)
115
+
116
+
117
+ new_option = st.text_input("Or type your query here:")
118
+
119
+ if new_option and new_option not in st.session_state.options:
120
+ st.session_state.options.append(new_option)
121
+ st.session_state.user_prompt = new_option
122
+
123
+ if st.session_state.uploaded_text and "Technical" not in st.session_state.uploaded_text[0].name:
124
+ st.session_state.user_prompt= st.selectbox("Enter/Select your query from the document", st.session_state.options,
125
+ index=st.session_state.options.index(st.session_state.user_prompt) if st.session_state.user_prompt in st.session_state.options else 0)
126
+
127
+ if st.session_state.user_prompt and st.session_state.user_prompt != "Select a query":
128
+ #st.write(st.session_state.user_prompt)
129
+ document_chain = create_stuff_documents_chain(llm=llm, prompt= prompt_template)
130
+ retriever = st.session_state.vectors.as_retriever()
131
+ retrieval_chain=create_retrieval_chain(retriever,document_chain)
132
+
133
+ start = time.process_time()
134
+ response = retrieval_chain.invoke({"input": st.session_state.user_prompt})
135
+ print(f"Response time :{time.process_time()-start}")
136
+
137
+ st.write(response['answer'])
138
+
139
+ ## With a streamlit expander
140
+ with st.expander("Document similarity Search"):
141
+ for i,doc in enumerate(response['context']):
142
+ st.write(doc.page_content)
143
+ st.write('------------------------')
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-community
3
+ pypdf
4
+ langchain_huggingface
5
+ faiss-cpu
6
+ langchain-text-splitters
7
+ python-dotenv
8
+ sentence_transformers
9
+ langchain_groq
10
+ streamlit
11
+ PyPDF2