Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Query | |
from fastapi.responses import FileResponse, JSONResponse | |
import uvicorn | |
import fitz # PyMuPDF for PDF text extraction | |
import faiss # FAISS for vector search | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from huggingface_hub import InferenceClient | |
import os | |
# Default settings | |
class ChatConfig: | |
MODEL = "google/gemma-3-27b-it" | |
DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF." | |
DEFAULT_MAX_TOKENS = 512 | |
DEFAULT_TEMP = 0.3 | |
DEFAULT_TOP_P = 0.95 | |
HF_TOKEN = os.getenv("HF_TOKEN") # Fetch from environment variables | |
client = InferenceClient(ChatConfig.MODEL, token=HF_TOKEN) | |
embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model | |
vector_dim = 384 # Embedding size | |
index = faiss.IndexFlatL2(vector_dim) # FAISS index | |
documents = [] # Store extracted text | |
app = FastAPI() | |
def serve_homepage(): | |
"""Serves the HTML interface.""" | |
return FileResponse("index.html") | |
async def upload_pdf(file_path: str): | |
"""Handles PDF file processing.""" | |
global documents | |
# Extract text from PDF | |
doc = fitz.open(file_path) | |
text_chunks = [page.get_text("text") for page in doc] | |
# Create vector database | |
documents = text_chunks | |
embeddings = embed_model.encode(text_chunks) | |
index.add(np.array(embeddings, dtype=np.float32)) | |
return JSONResponse({"message": "PDF uploaded and indexed successfully!"}) | |
def chat_with_pdf(msg: str = Query(..., title="User Message")): | |
"""Handles user queries and returns AI-generated responses.""" | |
if not documents: | |
return JSONResponse({"response": "Please upload a PDF first."}) | |
# Retrieve relevant context | |
query_embedding = embed_model.encode([msg]) | |
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3) | |
context = "\n".join([documents[i] for i in closest_idx[0]]) | |
# Generate AI response | |
messages = [ | |
{"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG}, | |
{"role": "user", "content": f"Context: {context}\nQuestion: {msg}"} | |
] | |
response_text = "" | |
for chunk in client.chat_completion( | |
messages, | |
max_tokens=ChatConfig.DEFAULT_MAX_TOKENS, | |
stream=True, | |
temperature=ChatConfig.DEFAULT_TEMP, | |
top_p=ChatConfig.DEFAULT_TOP_P, | |
): | |
token = chunk.choices[0].delta.content or "" | |
response_text += token | |
return JSONResponse({"response": response_text}) | |
if __name__ == "__main__": | |
uvicorn.run(app, host="0.0.0.0", port=8000) | |