Spaces:
Sleeping
Sleeping
File size: 2,783 Bytes
d7b350d d2c6ac6 b14a2f9 6fbaae6 b14a2f9 1fda785 b14a2f9 d2c6ac6 b14a2f9 28e10d5 d2c6ac6 d7b350d d2c6ac6 d7b350d 94f2884 d7b350d 94f2884 d7b350d 94f2884 d7b350d 94f2884 d2c6ac6 d7b350d 1fda785 d7b350d 94f2884 d7b350d d2c6ac6 d7b350d 94f2884 d2c6ac6 94f2884 fb8d4f3 94f2884 fb8d4f3 d2c6ac6 94f2884 1fda785 fb8d4f3 1fda785 fb8d4f3 1fda785 94f2884 d2c6ac6 d7b350d fb8d4f3 7cade68 d7b350d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from flask import Flask, request, jsonify, send_from_directory
import fitz # PyMuPDF for PDF text extraction
import faiss # FAISS for vector search
import numpy as np
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient
import os
# Default settings
class ChatConfig:
MODEL = "google/gemma-3-27b-it"
DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
DEFAULT_MAX_TOKENS = 512
DEFAULT_TEMP = 0.3
DEFAULT_TOP_P = 0.95
HF_TOKEN = os.getenv("HF_TOKEN") # Fetch from environment variables
client = InferenceClient(ChatConfig.MODEL, token=HF_TOKEN)
embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model
vector_dim = 384 # Embedding size
index = faiss.IndexFlatL2(vector_dim) # FAISS index
documents = [] # Store extracted text
app = Flask(__name__)
@app.route("/")
def serve_homepage():
"""Serves the HTML interface."""
return send_from_directory(os.getcwd(), 'index.html')
@app.route("/upload_pdf/", methods=["POST"])
def upload_pdf():
"""Handles PDF file processing."""
global documents
file = request.files['file']
# Save the uploaded file temporarily
file_path = os.path.join(os.getcwd(), file.filename)
file.save(file_path)
# Extract text from PDF
doc = fitz.open(file_path)
text_chunks = [page.get_text("text") for page in doc]
# Create vector database
documents = text_chunks
embeddings = embed_model.encode(text_chunks)
index.add(np.array(embeddings, dtype=np.float32))
return jsonify({"message": "PDF uploaded and indexed successfully!"})
@app.route("/chat/", methods=["GET"])
def chat_with_pdf():
"""Handles user queries and returns AI-generated responses."""
msg = request.args.get("msg")
if not documents:
return jsonify({"response": "Please upload a PDF first."})
# Retrieve relevant context
query_embedding = embed_model.encode([msg])
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
context = "\n".join([documents[i] for i in closest_idx[0]])
# Generate AI response
messages = [
{"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
{"role": "user", "content": f"Context: {context}\nQuestion: {msg}"}
]
response_text = ""
for chunk in client.chat_completion(
messages,
max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
stream=True,
temperature=ChatConfig.DEFAULT_TEMP,
top_p=ChatConfig.DEFAULT_TOP_P,
):
token = chunk.choices[0].delta.content or ""
response_text += token
return jsonify({"response": response_text})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8000)
|