PRIYANSHUDHAKED commited on
Commit
0317d24
·
verified ·
1 Parent(s): ffc4ebe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -118
app.py CHANGED
@@ -1,127 +1,133 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- from fastapi.responses import HTMLResponse
3
- from fastapi.staticfiles import StaticFiles
4
- import os
5
- from dotenv import load_dotenv
6
- from PyPDF2 import PdfReader
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
- from langchain_community.vectorstores import FAISS
10
- from langchain_google_genai import ChatGoogleGenerativeAI
11
- from langchain.chains.question_answering import load_qa_chain
12
- from langchain.prompts import PromptTemplate
13
- import logging
14
-
15
- app = FastAPI()
16
-
17
- # Set up logging
18
- logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
19
-
20
- import google.generativeai as genai
21
  from dotenv import load_dotenv
 
 
22
  import os
 
 
 
 
 
 
 
 
 
23
 
24
- # Load environment variables from .env file
25
- load_dotenv()
26
 
27
- # Retrieve the API key from the environment
28
- api_key = os.getenv("GOOGLE_API_KEY")
29
- if not api_key:
30
- raise EnvironmentError("Google API Key not found in environment variables.")
31
-
32
- # Configure Google Generative AI
33
- genai.configure(api_key=api_key)
34
-
35
-
36
- def get_pdf_text(pdf_docs):
37
- text = ""
38
- for pdf in pdf_docs:
39
- try:
40
- pdf_reader = PdfReader(pdf.file)
41
- for page in pdf_reader.pages:
42
- text += page.extract_text()
43
- except Exception as e:
44
- logging.error(f"Error processing PDF file: {e}")
45
- return text
46
-
47
- def get_text_chunks(text):
48
- text_splitter = RecursiveCharacterTextSplitter(
49
- chunk_size=10000,
50
- chunk_overlap=1000
51
- )
52
- chunks = text_splitter.split_text(text)
53
- return chunks
54
-
55
- def get_vector_store(text_chunks):
56
- logging.info("Starting vector store creation")
57
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
58
 
59
- vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
60
- logging.info("FAISS vector store created")
61
-
62
- faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
63
- os.makedirs(faiss_index_dir, exist_ok=True)
64
-
65
- vector_store.save_local(faiss_index_dir)
66
- logging.info("FAISS vector store saved successfully.")
67
-
68
- def get_conversation_chain():
69
- prompt_template = """
70
- Answer the question clear and precise. If not provided the context return the result as
71
- "Sorry I dont know the answer", don't provide the wrong answer.
72
- Context:\n {context}?\n
73
- Question:\n{question}\n
74
- Answer:
75
- """
76
- model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
77
- prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
78
- chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)
79
- return chain
80
-
81
- def user_input(user_question):
82
- logging.info("Processing user input")
 
 
 
83
 
84
- faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
 
85
 
86
- if not os.path.exists(faiss_index_dir):
87
- return "Please upload and process PDF files before asking questions."
88
-
89
- try:
90
- new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True)
91
- logging.info("FAISS vector store loaded successfully")
 
 
92
 
93
- docs = new_db.similarity_search(user_question)
94
- chain = get_conversation_chain()
95
- response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
96
- return response["output_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- except Exception as e:
99
- logging.error(f"Error processing user input: {e}")
100
- return f"Sorry, there was an error processing your request: {str(e)}. Please try again later."
101
-
102
- @app.post("/upload_pdf/")
103
- async def upload_pdf(pdf_docs: list[UploadFile] = File(...)):
104
- raw_text = get_pdf_text(pdf_docs)
105
- text_chunks = get_text_chunks(raw_text)
106
- get_vector_store(text_chunks)
107
- return {"message": "PDFs processed successfully. You can now ask questions."}
108
-
109
- @app.get("/ask_question/")
110
- async def ask_question(user_question: str):
111
- response = user_input(user_question)
112
- return {"response": response}
113
-
114
- @app.get("/", response_class=HTMLResponse)
115
- async def read_root():
116
- return """
117
- <html>
118
- <head>
119
- <title>Chat with PDFs</title>
120
- </head>
121
- <body>
122
- <h1>Welcome to Chat with PDFs API</h1>
123
- <p>Use POST /upload_pdf/ to upload PDF files.</p>
124
- <p>Use GET /ask_question/ to ask questions from the PDFs you uploaded.</p>
125
- </body>
126
- </html>
127
- """
 
1
+ import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  import os
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
+ import google.generativeai as genai
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain.prompts import PromptTemplate
12
+ from htmlTemplates import css, bot_template, user_template
13
+ import logging
14
+ import faiss
15
 
16
+ # Set up logging
17
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
18
 
19
+ load_dotenv()
20
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
21
+
22
+
23
+ def get_pdf_text(pdf_docs):
24
+ text = ""
25
+ for pdf in pdf_docs:
26
+ try:
27
+ pdf_reader = PdfReader(pdf)
28
+ for page in pdf_reader.pages:
29
+ text += page.extract_text()
30
+ except Exception as e:
31
+ logging.error(f"Error processing PDF file: {e}")
32
+ return text
33
+
34
+ def get_text_chunks(text):
35
+ text_splitter = RecursiveCharacterTextSplitter(
36
+ chunk_size=10000,
37
+ chunk_overlap=1000
38
+ )
39
+ chunks = text_splitter.split_text(text)
40
+ return chunks
41
+
42
+ def get_vector_store(text_chunks):
43
+ logging.info("Starting vector store creation")
44
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
45
+ logging.info("Embeddings created")
 
 
 
 
46
 
47
+ # Create the FAISS vector store
48
+ vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
49
+ logging.info("FAISS vector store created")
50
+
51
+ # Define the directory where the FAISS index will be saved
52
+ faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
53
+ os.makedirs(faiss_index_dir, exist_ok=True)
54
+
55
+ # Save the entire FAISS vector store, including the docstore and index_to_docstore_id
56
+ vector_store.save_local(faiss_index_dir)
57
+ logging.info("FAISS vector store saved successfully.")
58
+
59
+ def get_conversation_chain():
60
+ prompt_template = """
61
+ Answer the question clear and precise. If not provided the context return the result as
62
+ "Sorry I dont know the answer", don't provide the wrong answer.
63
+ Context:\n {context}?\n
64
+ Question:\n{question}\n
65
+ Answer:
66
+ """
67
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
68
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
69
+ chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)
70
+ return chain
71
+
72
+ def user_input(user_question):
73
+ logging.info("Processing user input")
74
 
75
+ # Reload the FAISS vector store from the saved directory
76
+ faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
77
 
78
+ if not os.path.exists(faiss_index_dir):
79
+ st.warning("Please upload and process PDF files before asking questions.")
80
+ return
81
+
82
+ try:
83
+ # Load the entire FAISS vector store, enabling dangerous deserialization since we trust the source
84
+ new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True)
85
+ logging.info("FAISS vector store loaded successfully")
86
 
87
+ # Perform similarity search and generate response
88
+ docs = new_db.similarity_search(user_question)
89
+ chain = get_conversation_chain()
90
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
91
+ st.write(user_template.replace("{{MSG}}", response["output_text"]), unsafe_allow_html=True)
92
+ except Exception as e:
93
+ logging.error(f"Error processing user input: {e}")
94
+ st.write(bot_template.replace("{{MSG}}", f"Sorry, there was an error processing your request: {str(e)}. Please try again later."), unsafe_allow_html=True)
95
+
96
+ def main():
97
+ st.set_page_config(page_title="Chat with multiple PDFs",
98
+ page_icon=":books:")
99
+ st.write(css, unsafe_allow_html=True)
100
+
101
+ if "conversation" not in st.session_state:
102
+ st.session_state.conversation = None
103
+ if "chat_history" not in st.session_state:
104
+ st.session_state.chat_history = None
105
+
106
+ st.header("Chat with multiple PDFs with Gemini Pro :books:")
107
 
108
+ with st.sidebar:
109
+ pdf_docs = st.file_uploader(
110
+ "Upload your PDF Files and Click on Process",
111
+ accept_multiple_files=True
112
+ )
113
+ if st.button("Process"):
114
+ with st.spinner("Processing..."):
115
+ try:
116
+ raw_text = get_pdf_text(pdf_docs)
117
+ text_chunks = get_text_chunks(raw_text)
118
+ get_vector_store(text_chunks)
119
+ st.session_state.conversation = get_conversation_chain()
120
+ st.success("PDFs processed successfully. You can now ask questions.")
121
+ except Exception as e:
122
+ logging.error(f"Error processing PDF files: {e}")
123
+ st.error("There was an error processing the PDF files. Please try again later.")
124
+
125
+ user_question = st.text_input("Ask a Question from the PDF Files")
126
+ if user_question:
127
+ if not os.path.exists(os.path.join(os.path.dirname(__file__), "faiss_index", "index.faiss")):
128
+ st.warning("Please upload and process PDF files before asking questions.")
129
+ else:
130
+ user_input(user_question)
131
+
132
+ if __name__ == "__main__":
133
+ main()