PRIYANSHUDHAKED commited on
Commit
42416c1
·
verified ·
1 Parent(s): c15a5f1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +114 -0
main.py CHANGED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ import os
5
+ from dotenv import load_dotenv
6
+ from PyPDF2 import PdfReader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from langchain.prompts import PromptTemplate
13
+ import logging
14
+
15
+ app = FastAPI()
16
+
17
+ # Set up logging
18
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
19
+
20
+ load_dotenv()
21
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
22
+
23
+ def get_pdf_text(pdf_docs):
24
+ text = ""
25
+ for pdf in pdf_docs:
26
+ try:
27
+ pdf_reader = PdfReader(pdf.file)
28
+ for page in pdf_reader.pages:
29
+ text += page.extract_text()
30
+ except Exception as e:
31
+ logging.error(f"Error processing PDF file: {e}")
32
+ return text
33
+
34
+ def get_text_chunks(text):
35
+ text_splitter = RecursiveCharacterTextSplitter(
36
+ chunk_size=10000,
37
+ chunk_overlap=1000
38
+ )
39
+ chunks = text_splitter.split_text(text)
40
+ return chunks
41
+
42
+ def get_vector_store(text_chunks):
43
+ logging.info("Starting vector store creation")
44
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
45
+
46
+ vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
47
+ logging.info("FAISS vector store created")
48
+
49
+ faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
50
+ os.makedirs(faiss_index_dir, exist_ok=True)
51
+
52
+ vector_store.save_local(faiss_index_dir)
53
+ logging.info("FAISS vector store saved successfully.")
54
+
55
+ def get_conversation_chain():
56
+ prompt_template = """
57
+ Answer the question clear and precise. If not provided the context return the result as
58
+ "Sorry I dont know the answer", don't provide the wrong answer.
59
+ Context:\n {context}?\n
60
+ Question:\n{question}\n
61
+ Answer:
62
+ """
63
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
64
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
65
+ chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)
66
+ return chain
67
+
68
+ def user_input(user_question):
69
+ logging.info("Processing user input")
70
+
71
+ faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
72
+
73
+ if not os.path.exists(faiss_index_dir):
74
+ return "Please upload and process PDF files before asking questions."
75
+
76
+ try:
77
+ new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True)
78
+ logging.info("FAISS vector store loaded successfully")
79
+
80
+ docs = new_db.similarity_search(user_question)
81
+ chain = get_conversation_chain()
82
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
83
+ return response["output_text"]
84
+
85
+ except Exception as e:
86
+ logging.error(f"Error processing user input: {e}")
87
+ return f"Sorry, there was an error processing your request: {str(e)}. Please try again later."
88
+
89
+ @app.post("/upload_pdf/")
90
+ async def upload_pdf(pdf_docs: list[UploadFile] = File(...)):
91
+ raw_text = get_pdf_text(pdf_docs)
92
+ text_chunks = get_text_chunks(raw_text)
93
+ get_vector_store(text_chunks)
94
+ return {"message": "PDFs processed successfully. You can now ask questions."}
95
+
96
+ @app.get("/ask_question/")
97
+ async def ask_question(user_question: str):
98
+ response = user_input(user_question)
99
+ return {"response": response}
100
+
101
+ @app.get("/", response_class=HTMLResponse)
102
+ async def read_root():
103
+ return """
104
+ <html>
105
+ <head>
106
+ <title>Chat with PDFs</title>
107
+ </head>
108
+ <body>
109
+ <h1>Welcome to Chat with PDFs API</h1>
110
+ <p>Use POST /upload_pdf/ to upload PDF files.</p>
111
+ <p>Use GET /ask_question/ to ask questions from the PDFs you uploaded.</p>
112
+ </body>
113
+ </html>
114
+ """