gemma-3-chat-api

Sleeping

File size: 2,783 Bytes

d7b350d
d2c6ac6
 
 
 
b14a2f9
6fbaae6
b14a2f9
1fda785
b14a2f9
 
d2c6ac6
b14a2f9
 
 
 
28e10d5
 
 
d2c6ac6
 
 
 
 
 
d7b350d
d2c6ac6
d7b350d
94f2884
 
d7b350d
94f2884
d7b350d
 
94f2884
 
d7b350d
 
 
 
 
94f2884
 
 
 
 
 
d2c6ac6
 
 
 
d7b350d
1fda785
d7b350d
 
94f2884
d7b350d
 
d2c6ac6
d7b350d
94f2884
 
 
 
 
d2c6ac6
94f2884
fb8d4f3
 
94f2884
fb8d4f3
d2c6ac6
94f2884
1fda785
 
fb8d4f3
1fda785
fb8d4f3
 
1fda785
 
94f2884
d2c6ac6
d7b350d
fb8d4f3
7cade68
d7b350d

from flask import Flask, request, jsonify, send_from_directory
import fitz  # PyMuPDF for PDF text extraction
import faiss  # FAISS for vector search
import numpy as np
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient
import os

# Default settings
class ChatConfig:
    MODEL = "google/gemma-3-27b-it"
    DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
    DEFAULT_MAX_TOKENS = 512
    DEFAULT_TEMP = 0.3
    DEFAULT_TOP_P = 0.95

HF_TOKEN = os.getenv("HF_TOKEN")  # Fetch from environment variables

client = InferenceClient(ChatConfig.MODEL, token=HF_TOKEN)
embed_model = SentenceTransformer("all-MiniLM-L6-v2")  # Lightweight embedding model
vector_dim = 384  # Embedding size
index = faiss.IndexFlatL2(vector_dim)  # FAISS index

documents = []  # Store extracted text

app = Flask(__name__)

@app.route("/")
def serve_homepage():
    """Serves the HTML interface."""
    return send_from_directory(os.getcwd(), 'index.html')

@app.route("/upload_pdf/", methods=["POST"])
def upload_pdf():
    """Handles PDF file processing."""
    global documents
    file = request.files['file']

    # Save the uploaded file temporarily
    file_path = os.path.join(os.getcwd(), file.filename)
    file.save(file_path)

    # Extract text from PDF
    doc = fitz.open(file_path)
    text_chunks = [page.get_text("text") for page in doc]
    
    # Create vector database
    documents = text_chunks
    embeddings = embed_model.encode(text_chunks)
    index.add(np.array(embeddings, dtype=np.float32))

    return jsonify({"message": "PDF uploaded and indexed successfully!"})

@app.route("/chat/", methods=["GET"])
def chat_with_pdf():
    """Handles user queries and returns AI-generated responses."""
    msg = request.args.get("msg")

    if not documents:
        return jsonify({"response": "Please upload a PDF first."})

    # Retrieve relevant context
    query_embedding = embed_model.encode([msg])
    _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
    context = "\n".join([documents[i] for i in closest_idx[0]])

    # Generate AI response
    messages = [
        {"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
        {"role": "user", "content": f"Context: {context}\nQuestion: {msg}"}
    ]

    response_text = ""
    for chunk in client.chat_completion(
        messages,
        max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
        stream=True,
        temperature=ChatConfig.DEFAULT_TEMP,
        top_p=ChatConfig.DEFAULT_TOP_P,
    ):
        token = chunk.choices[0].delta.content or ""
        response_text += token

    return jsonify({"response": response_text})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8000)