AjithBharadwaj commited on
Commit
3cc4f09
·
verified ·
1 Parent(s): a2c3830

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -0
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ import google.generativeai as genai
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.prompts import PromptTemplate
9
+ from dotenv import load_dotenv
10
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
11
+ from langchain_huggingface import HuggingFaceEndpoint
12
+
13
+
14
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+ def get_pdf_text(pdf_docs):
25
+ text=""
26
+ for pdf in pdf_docs:
27
+ pdf_reader= PdfReader(pdf)
28
+ for page in pdf_reader.pages:
29
+ text+= page.extract_text()
30
+ return text
31
+
32
+
33
+
34
+ def get_text_chunks(text):
35
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
36
+ chunks = text_splitter.split_text(text)
37
+ return chunks
38
+
39
+
40
+ def get_vector_store(text_chunks):
41
+ model_name = "BAAI/bge-large-en"
42
+ model_kwargs = {'device': 'cpu'}
43
+ encode_kwargs = {'normalize_embeddings': True}
44
+ hf = HuggingFaceBgeEmbeddings(
45
+ model_name=model_name,
46
+ model_kwargs=model_kwargs,
47
+ encode_kwargs=encode_kwargs
48
+ )
49
+ vector_store = FAISS.from_texts(text_chunks, embedding=hf)
50
+ vector_store.save_local("faiss_index")
51
+
52
+
53
+ def get_conversational_chain():
54
+
55
+ prompt_template = """
56
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
57
+ provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
58
+ Context:\n {context}?\n
59
+ Question: \n{question}\n
60
+
61
+ Answer:
62
+ """
63
+
64
+ model = HuggingFaceEndpoint(
65
+ repo_id="google/gemma-2-9b-it",
66
+ temperature=0.3,
67
+ huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
68
+ )
69
+ prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
70
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
71
+
72
+ return chain
73
+
74
+
75
+
76
+ def user_input(user_question):
77
+ model_name = "BAAI/bge-large-en"
78
+ model_kwargs = {'device': 'cpu'}
79
+ encode_kwargs = {'normalize_embeddings': True}
80
+ hf = HuggingFaceBgeEmbeddings(
81
+ model_name=model_name,
82
+ model_kwargs=model_kwargs,
83
+ encode_kwargs=encode_kwargs
84
+ )
85
+
86
+ new_db = FAISS.load_local("faiss_index", hf)
87
+ docs = new_db.similarity_search(user_question)
88
+
89
+ chain = get_conversational_chain()
90
+
91
+
92
+ response = chain(
93
+ {"input_documents":docs, "question": user_question}
94
+ , return_only_outputs=True)
95
+
96
+ print(response)
97
+ st.write("Reply: ", response["output_text"])
98
+
99
+
100
+
101
+
102
+ def main():
103
+ st.set_page_config("Chat PDF")
104
+ st.header("Chat with PDF using Gemma")
105
+
106
+ user_question = st.text_input("Ask a Question from the PDF Files")
107
+
108
+ if user_question:
109
+ user_input(user_question)
110
+
111
+ with st.sidebar:
112
+ st.title("Menu:")
113
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
114
+ if st.button("Submit & Process"):
115
+ with st.spinner("Processing..."):
116
+ raw_text = get_pdf_text(pdf_docs)
117
+ text_chunks = get_text_chunks(raw_text)
118
+ get_vector_store(text_chunks)
119
+ st.success("Done")
120
+
121
+
122
+
123
+ if __name__ == "__main__":
124
+ main()