NanobotzAI commited on
Commit
94f2884
·
verified ·
1 Parent(s): 7129024

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -78
app.py CHANGED
@@ -1,13 +1,11 @@
1
- import gradio as gr
 
 
2
  import fitz # PyMuPDF for PDF text extraction
3
  import faiss # FAISS for vector search
4
  import numpy as np
5
- import threading
6
  from sentence_transformers import SentenceTransformer
7
  from huggingface_hub import InferenceClient
8
- from typing import List, Tuple
9
- from fastapi import FastAPI, Query
10
- import uvicorn
11
 
12
  # Default settings
13
  class ChatConfig:
@@ -24,37 +22,47 @@ index = faiss.IndexFlatL2(vector_dim) # FAISS index
24
 
25
  documents = [] # Store extracted text
26
 
27
- def extract_text_from_pdf(pdf_path):
28
- """Extracts text from PDF"""
29
- doc = fitz.open(pdf_path)
30
- text_chunks = [page.get_text("text") for page in doc]
31
- return text_chunks
32
 
33
- def create_vector_db(text_chunks):
34
- """Embeds text chunks and adds them to FAISS index"""
35
- global documents, index
 
 
 
 
 
 
 
 
 
 
 
 
36
  documents = text_chunks
37
  embeddings = embed_model.encode(text_chunks)
38
  index.add(np.array(embeddings, dtype=np.float32))
39
 
40
- def search_relevant_text(query):
41
- """Finds the most relevant text chunk for the given query"""
42
- query_embedding = embed_model.encode([query])
43
- _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
44
- return "\n".join([documents[i] for i in closest_idx[0]])
45
 
46
- def generate_response_sync(message: str) -> str:
47
- """Generates response synchronously for FastAPI"""
 
48
  if not documents:
49
- return "Please upload a PDF first."
 
 
 
 
 
50
 
51
- context = search_relevant_text(message) # Get relevant content from PDF
52
  messages = [
53
  {"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
54
- {"role": "user", "content": f"Context: {context}\nQuestion: {message}"}
55
  ]
56
 
57
- response = ""
58
  for chunk in client.chat_completion(
59
  messages,
60
  max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
@@ -63,61 +71,9 @@ def generate_response_sync(message: str) -> str:
63
  top_p=ChatConfig.DEFAULT_TOP_P,
64
  ):
65
  token = chunk.choices[0].delta.content or ""
66
- response += token
67
-
68
- return response
69
-
70
- def handle_upload(pdf_file):
71
- """Handles PDF upload and creates vector DB"""
72
- text_chunks = extract_text_from_pdf(pdf_file.name)
73
- create_vector_db(text_chunks)
74
- return "PDF uploaded and indexed successfully!"
75
-
76
- def create_interface() -> gr.Blocks:
77
- """Creates the Gradio interface"""
78
- with gr.Blocks() as interface:
79
- gr.Markdown("# PDF-Based Chatbot using Google Gemma")
80
-
81
- with gr.Row():
82
- chatbot = gr.Chatbot(label="Chat with Your PDF", type="messages")
83
- pdf_upload = gr.File(label="Upload PDF", type="filepath")
84
-
85
- with gr.Row():
86
- user_input = gr.Textbox(label="Ask a question", placeholder="Type here...")
87
- send_button = gr.Button("Send")
88
-
89
- output = gr.Textbox(label="Response", lines=5)
90
-
91
- # Upload PDF handler
92
- pdf_upload.change(handle_upload, inputs=[pdf_upload], outputs=[])
93
-
94
- # Chat function
95
- send_button.click(
96
- generate_response_sync,
97
- inputs=[user_input],
98
- outputs=[output]
99
- )
100
 
101
- return interface
102
-
103
- # FastAPI Integration
104
- app = FastAPI()
105
-
106
- @app.get("/chat")
107
- def chat_with_pdf(msg: str = Query(..., title="User Message")):
108
- """API endpoint to receive a message and return AI response"""
109
- response = generate_response_sync(msg)
110
- return {"response": response}
111
-
112
- def run_gradio():
113
- """Launches Gradio in a separate thread."""
114
- gradio_app = create_interface()
115
- gradio_app.launch(server_name="0.0.0.0", server_port=7860, share=True, enable_queue=False)
116
 
117
  if __name__ == "__main__":
118
- # Start Gradio in a separate thread
119
- gradio_thread = threading.Thread(target=run_gradio, daemon=True)
120
- gradio_thread.start()
121
-
122
- # Run FastAPI with Uvicorn
123
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ from fastapi import FastAPI, Query
2
+ from fastapi.responses import FileResponse, JSONResponse
3
+ import uvicorn
4
  import fitz # PyMuPDF for PDF text extraction
5
  import faiss # FAISS for vector search
6
  import numpy as np
 
7
  from sentence_transformers import SentenceTransformer
8
  from huggingface_hub import InferenceClient
 
 
 
9
 
10
  # Default settings
11
  class ChatConfig:
 
22
 
23
  documents = [] # Store extracted text
24
 
25
+ app = FastAPI()
 
 
 
 
26
 
27
+ @app.get("/")
28
+ def serve_homepage():
29
+ """Serves the HTML interface."""
30
+ return FileResponse("index.html")
31
+
32
+ @app.post("/upload_pdf/")
33
+ async def upload_pdf(file_path: str):
34
+ """Handles PDF file processing."""
35
+ global documents
36
+
37
+ # Extract text from PDF
38
+ doc = fitz.open(file_path)
39
+ text_chunks = [page.get_text("text") for page in doc]
40
+
41
+ # Create vector database
42
  documents = text_chunks
43
  embeddings = embed_model.encode(text_chunks)
44
  index.add(np.array(embeddings, dtype=np.float32))
45
 
46
+ return JSONResponse({"message": "PDF uploaded and indexed successfully!"})
 
 
 
 
47
 
48
+ @app.get("/chat/")
49
+ def chat_with_pdf(msg: str = Query(..., title="User Message")):
50
+ """Handles user queries and returns AI-generated responses."""
51
  if not documents:
52
+ return JSONResponse({"response": "Please upload a PDF first."})
53
+
54
+ # Retrieve relevant context
55
+ query_embedding = embed_model.encode([msg])
56
+ _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
57
+ context = "\n".join([documents[i] for i in closest_idx[0]])
58
 
59
+ # Generate AI response
60
  messages = [
61
  {"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
62
+ {"role": "user", "content": f"Context: {context}\nQuestion: {msg}"}
63
  ]
64
 
65
+ response_text = ""
66
  for chunk in client.chat_completion(
67
  messages,
68
  max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
 
71
  top_p=ChatConfig.DEFAULT_TOP_P,
72
  ):
73
  token = chunk.choices[0].delta.content or ""
74
+ response_text += token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ return JSONResponse({"response": response_text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  if __name__ == "__main__":
 
 
 
 
 
79
  uvicorn.run(app, host="0.0.0.0", port=8000)