Spaces:
Sleeping
Sleeping
import gradio as gr | |
import fitz # PyMuPDF for PDF text extraction | |
import faiss # FAISS for vector search | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from huggingface_hub import InferenceClient | |
from typing import List, Tuple | |
from fastapi import FastAPI, Query | |
import uvicorn | |
# Default settings | |
class ChatConfig: | |
MODEL = "google/gemma-3-27b-it" | |
DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF." | |
DEFAULT_MAX_TOKENS = 512 | |
DEFAULT_TEMP = 0.3 | |
DEFAULT_TOP_P = 0.95 | |
client = InferenceClient(ChatConfig.MODEL) | |
embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model | |
vector_dim = 384 # Embedding size | |
index = faiss.IndexFlatL2(vector_dim) # FAISS index | |
documents = [] # Store extracted text | |
def extract_text_from_pdf(pdf_path): | |
"""Extracts text from PDF""" | |
doc = fitz.open(pdf_path) | |
text_chunks = [page.get_text("text") for page in doc] | |
return text_chunks | |
def create_vector_db(text_chunks): | |
"""Embeds text chunks and adds them to FAISS index""" | |
global documents, index | |
documents = text_chunks | |
embeddings = embed_model.encode(text_chunks) | |
index.add(np.array(embeddings, dtype=np.float32)) | |
def search_relevant_text(query): | |
"""Finds the most relevant text chunk for the given query""" | |
query_embedding = embed_model.encode([query]) | |
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3) | |
return "\n".join([documents[i] for i in closest_idx[0]]) | |
def generate_response_sync(message: str) -> str: | |
"""Generates response synchronously for FastAPI""" | |
if not documents: | |
return "Please upload a PDF first." | |
context = search_relevant_text(message) # Get relevant content from PDF | |
messages = [ | |
{"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG}, | |
{"role": "user", "content": f"Context: {context}\nQuestion: {message}"} | |
] | |
response = "" | |
for chunk in client.chat_completion( | |
messages, | |
max_tokens=ChatConfig.DEFAULT_MAX_TOKENS, | |
stream=True, | |
temperature=ChatConfig.DEFAULT_TEMP, | |
top_p=ChatConfig.DEFAULT_TOP_P, | |
): | |
token = chunk.choices[0].delta.content or "" | |
response += token | |
return response | |
def handle_upload(pdf_file): | |
"""Handles PDF upload and creates vector DB""" | |
text_chunks = extract_text_from_pdf(pdf_file.name) | |
create_vector_db(text_chunks) | |
return "PDF uploaded and indexed successfully!" | |
def create_interface() -> gr.Blocks: | |
"""Creates the Gradio interface""" | |
with gr.Blocks() as interface: | |
gr.Markdown("# PDF-Based Chatbot using Google Gemma") | |
with gr.Row(): | |
chatbot = gr.Chatbot(label="Chat with Your PDF", type="messages") | |
pdf_upload = gr.File(label="Upload PDF", type="filepath") | |
with gr.Row(): | |
user_input = gr.Textbox(label="Ask a question", placeholder="Type here...") | |
send_button = gr.Button("Send") | |
output = gr.Textbox(label="Response", lines=5) | |
# Upload PDF handler | |
pdf_upload.change(handle_upload, inputs=[pdf_upload], outputs=[]) | |
# Chat function | |
send_button.click( | |
generate_response_sync, | |
inputs=[user_input], | |
outputs=[output] | |
) | |
return interface | |
# FastAPI Integration | |
app = FastAPI() | |
def chat_with_pdf(msg: str = Query(..., title="User Message")): | |
"""API endpoint to receive a message and return AI response""" | |
response = generate_response_sync(msg) | |
return {"response": response} | |
if __name__ == "__main__": | |
import threading | |
# Start Gradio UI in a separate thread | |
def run_gradio(): | |
gradio_app = create_interface() | |
gradio_app.launch(server_name="0.0.0.0", server_port=7860, share=True) | |
threading.Thread(target=run_gradio).start() | |
# Start FastAPI | |
uvicorn.run(app, host="0.0.0.0", port=8000) | |