gemma-3-chat-api

Sleeping

App Files Files Community

gemma-3-chat-api / app.py

NanobotzAI

Update app.py

fb8d4f3 verified 3 months ago

raw

history blame

4.01 kB

	import gradio as gr
	import fitz # PyMuPDF for PDF text extraction
	import faiss # FAISS for vector search
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from huggingface_hub import InferenceClient
	from typing import List, Tuple
	from fastapi import FastAPI, Query
	import uvicorn

	# Default settings
	class ChatConfig:
	MODEL = "google/gemma-3-27b-it"
	DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
	DEFAULT_MAX_TOKENS = 512
	DEFAULT_TEMP = 0.3
	DEFAULT_TOP_P = 0.95

	client = InferenceClient(ChatConfig.MODEL)
	embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model
	vector_dim = 384 # Embedding size
	index = faiss.IndexFlatL2(vector_dim) # FAISS index

	documents = [] # Store extracted text

	def extract_text_from_pdf(pdf_path):
	"""Extracts text from PDF"""
	doc = fitz.open(pdf_path)
	text_chunks = [page.get_text("text") for page in doc]
	return text_chunks

	def create_vector_db(text_chunks):
	"""Embeds text chunks and adds them to FAISS index"""
	global documents, index
	documents = text_chunks
	embeddings = embed_model.encode(text_chunks)
	index.add(np.array(embeddings, dtype=np.float32))

	def search_relevant_text(query):
	"""Finds the most relevant text chunk for the given query"""
	query_embedding = embed_model.encode([query])
	_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
	return "\n".join([documents[i] for i in closest_idx[0]])

	def generate_response_sync(message: str) -> str:
	"""Generates response synchronously for FastAPI"""
	if not documents:
	return "Please upload a PDF first."

	context = search_relevant_text(message) # Get relevant content from PDF
	messages = [
	{"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
	{"role": "user", "content": f"Context: {context}\nQuestion: {message}"}
	]

	response = ""
	for chunk in client.chat_completion(
	messages,
	max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
	stream=True,
	temperature=ChatConfig.DEFAULT_TEMP,
	top_p=ChatConfig.DEFAULT_TOP_P,
	):
	token = chunk.choices[0].delta.content or ""
	response += token

	return response

	def handle_upload(pdf_file):
	"""Handles PDF upload and creates vector DB"""
	text_chunks = extract_text_from_pdf(pdf_file.name)
	create_vector_db(text_chunks)
	return "PDF uploaded and indexed successfully!"

	def create_interface() -> gr.Blocks:
	"""Creates the Gradio interface"""
	with gr.Blocks() as interface:
	gr.Markdown("# PDF-Based Chatbot using Google Gemma")

	with gr.Row():
	chatbot = gr.Chatbot(label="Chat with Your PDF", type="messages")
	pdf_upload = gr.File(label="Upload PDF", type="filepath")

	with gr.Row():
	user_input = gr.Textbox(label="Ask a question", placeholder="Type here...")
	send_button = gr.Button("Send")

	output = gr.Textbox(label="Response", lines=5)

	# Upload PDF handler
	pdf_upload.change(handle_upload, inputs=[pdf_upload], outputs=[])

	# Chat function
	send_button.click(
	generate_response_sync,
	inputs=[user_input],
	outputs=[output]
	)

	return interface

	# FastAPI Integration
	app = FastAPI()

	@app.get("/chat")
	def chat_with_pdf(msg: str = Query(..., title="User Message")):
	"""API endpoint to receive a message and return AI response"""
	response = generate_response_sync(msg)
	return {"response": response}

	if __name__ == "__main__":
	import threading

	# Start Gradio UI in a separate thread
	def run_gradio():
	gradio_app = create_interface()
	gradio_app.launch(server_name="0.0.0.0", server_port=7860, share=True)

	threading.Thread(target=run_gradio).start()

	# Start FastAPI
	uvicorn.run(app, host="0.0.0.0", port=8000)