gemma-3-chat-api

Sleeping

App Files Files Community

gemma-3-chat-api / app.py

NanobotzAI

Update app.py

d7b350d verified 3 months ago

raw

history blame

2.78 kB

	from flask import Flask, request, jsonify, send_from_directory
	import fitz # PyMuPDF for PDF text extraction
	import faiss # FAISS for vector search
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from huggingface_hub import InferenceClient
	import os

	# Default settings
	class ChatConfig:
	MODEL = "google/gemma-3-27b-it"
	DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
	DEFAULT_MAX_TOKENS = 512
	DEFAULT_TEMP = 0.3
	DEFAULT_TOP_P = 0.95

	HF_TOKEN = os.getenv("HF_TOKEN") # Fetch from environment variables

	client = InferenceClient(ChatConfig.MODEL, token=HF_TOKEN)
	embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model
	vector_dim = 384 # Embedding size
	index = faiss.IndexFlatL2(vector_dim) # FAISS index

	documents = [] # Store extracted text

	app = Flask(__name__)

	@app.route("/")
	def serve_homepage():
	"""Serves the HTML interface."""
	return send_from_directory(os.getcwd(), 'index.html')

	@app.route("/upload_pdf/", methods=["POST"])
	def upload_pdf():
	"""Handles PDF file processing."""
	global documents
	file = request.files['file']

	# Save the uploaded file temporarily
	file_path = os.path.join(os.getcwd(), file.filename)
	file.save(file_path)

	# Extract text from PDF
	doc = fitz.open(file_path)
	text_chunks = [page.get_text("text") for page in doc]

	# Create vector database
	documents = text_chunks
	embeddings = embed_model.encode(text_chunks)
	index.add(np.array(embeddings, dtype=np.float32))

	return jsonify({"message": "PDF uploaded and indexed successfully!"})

	@app.route("/chat/", methods=["GET"])
	def chat_with_pdf():
	"""Handles user queries and returns AI-generated responses."""
	msg = request.args.get("msg")

	if not documents:
	return jsonify({"response": "Please upload a PDF first."})

	# Retrieve relevant context
	query_embedding = embed_model.encode([msg])
	_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
	context = "\n".join([documents[i] for i in closest_idx[0]])

	# Generate AI response
	messages = [
	{"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
	{"role": "user", "content": f"Context: {context}\nQuestion: {msg}"}
	]

	response_text = ""
	for chunk in client.chat_completion(
	messages,
	max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
	stream=True,
	temperature=ChatConfig.DEFAULT_TEMP,
	top_p=ChatConfig.DEFAULT_TOP_P,
	):
	token = chunk.choices[0].delta.content or ""
	response_text += token

	return jsonify({"response": response_text})

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=8000)