gemma-3-chat-api

Sleeping

App Files Files Community

NanobotzAI commited on Apr 3

Commit

bf5d856

verified ·

1 Parent(s): a3dcdff

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -35

app.py CHANGED Viewed

@@ -11,29 +11,38 @@ app = Flask(__name__, template_folder=os.getcwd())
 # Default settings
 class ChatConfig:
-    MODEL = "google/gemma-3-27b-it"
     DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
     DEFAULT_MAX_TOKENS = 512
     DEFAULT_TEMP = 0.3
     DEFAULT_TOP_P = 0.95
-client = InferenceClient(ChatConfig.MODEL)
 embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
 vector_dim = 384  # Embedding size
 index = faiss.IndexFlatL2(vector_dim)  # FAISS index
 documents = []  # Store extracted text
-def extract_text_from_pdf(pdf_path):
-    """Extracts text from PDF"""
-    doc = fitz.open(pdf_path)
     text_chunks = [page.get_text("text") for page in doc]
     return text_chunks
 def create_vector_db(text_chunks):
     """Embeds text chunks and adds them to FAISS index"""
     global documents, index
     documents = text_chunks
     embeddings = embed_model.encode(text_chunks)
@@ -70,52 +79,63 @@ def generate_response(
     context = search_relevant_text(message)  # Get relevant content from PDF
-    messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
-        if user_msg:
             messages.append({"role": "user", "content": user_msg})
-        if bot_msg:
             messages.append({"role": "assistant", "content": bot_msg})
-    messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {message}"})
-    response = ""
-    for chunk in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = chunk.choices[0].delta.content or ""
-        response += token
-    return response
 @app.route('/')
 def index():
     """Serve the HTML page for the user interface"""
     return render_template('index.html')
-UPLOAD_FOLDER = "/tmp/uploaded_files"
-os.makedirs(UPLOAD_FOLDER, exist_ok=True)  # Ensure the folder exists
 @app.route('/upload_pdf', methods=['POST'])
 def upload_pdf():
     """Handle PDF upload"""
     if 'pdf' not in request.files:
-        return jsonify({"error": "No file part"}), 400  # Handle missing file
     file = request.files['pdf']
     if file.filename == "":
-        return jsonify({"error": "No selected file"}), 400  # Handle empty filename
-    pdf_path = os.path.join(UPLOAD_FOLDER, file.filename)
     try:
-        file.save(pdf_path)  # Save the uploaded PDF
-        # Extract text and create vector database
-        text_chunks = extract_text_from_pdf(pdf_path)
         create_vector_db(text_chunks)
         return jsonify({"message": "PDF uploaded and indexed successfully!"}), 200
@@ -128,7 +148,7 @@ def ask_question():
     message = request.json.get('message')
     history = request.json.get('history', [])
     response = generate_response(message, history)
-    return jsonify({"response": response})
 if __name__ == '__main__':
     app.run(debug=True)

 # Default settings
 class ChatConfig:
+    MODEL = "google/gemma-3-27b-it"  # Change back to Gemma
     DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
     DEFAULT_MAX_TOKENS = 512
     DEFAULT_TEMP = 0.3
     DEFAULT_TOP_P = 0.95
+# Get the token from environment variable
+HF_TOKEN = os.getenv('HF_TOKEN')
+client = InferenceClient(
+    ChatConfig.MODEL,
+    token=HF_TOKEN
+)
 embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
 vector_dim = 384  # Embedding size
 index = faiss.IndexFlatL2(vector_dim)  # FAISS index
 documents = []  # Store extracted text
+def extract_text_from_pdf(pdf_stream):
+    """Extracts text from PDF stream"""
+    doc = fitz.open(stream=pdf_stream, filetype="pdf")
     text_chunks = [page.get_text("text") for page in doc]
+    doc.close()
     return text_chunks
 def create_vector_db(text_chunks):
     """Embeds text chunks and adds them to FAISS index"""
     global documents, index
+    # Reinitialize the FAISS index
+    index = faiss.IndexFlatL2(vector_dim)
     documents = text_chunks
     embeddings = embed_model.encode(text_chunks)
     context = search_relevant_text(message)  # Get relevant content from PDF
+    # Start with the system message in the first user message
+    messages = []
+    first_msg = f"{system_message}\n\nContext: {context}\nQuestion: {message}"
+    messages.append({"role": "user", "content": first_msg})
+    # Add conversation history ensuring alternating pattern (user, assistant, user, assistant...)
     for user_msg, bot_msg in history:
+        if user_msg.strip():  # Check if user message is not empty
             messages.append({"role": "user", "content": user_msg})
+        if bot_msg.strip():  # Check if assistant message is not empty
             messages.append({"role": "assistant", "content": bot_msg})
+    try:
+        response = ""
+        for chunk in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token = chunk.choices[0].delta.content or ""
+            response += token
+            yield response
+    except Exception as e:
+        print(f"Error generating response: {str(e)}")
+        yield "I apologize, but I encountered an error while generating the response. Please try again."
 @app.route('/')
 def index():
     """Serve the HTML page for the user interface"""
     return render_template('index.html')
 @app.route('/upload_pdf', methods=['POST'])
 def upload_pdf():
     """Handle PDF upload"""
     if 'pdf' not in request.files:
+        return jsonify({"error": "No file part"}), 400
     file = request.files['pdf']
     if file.filename == "":
+        return jsonify({"error": "No selected file"}), 400
     try:
+        # Read the file directly into memory instead of saving to disk
+        pdf_stream = file.read()
+        # Create a BytesIO object to work with the PDF in memory
+        from io import BytesIO
+        pdf_stream = BytesIO(pdf_stream)
+        # Use fitz to open the PDF from memory
+        doc = fitz.open(stream=pdf_stream, filetype="pdf")
+        text_chunks = [page.get_text("text") for page in doc]
+        doc.close()
+        # Create vector database
         create_vector_db(text_chunks)
         return jsonify({"message": "PDF uploaded and indexed successfully!"}), 200
     message = request.json.get('message')
     history = request.json.get('history', [])
     response = generate_response(message, history)
+    return jsonify({"response": "".join(response)})  # Join all streamed responses
 if __name__ == '__main__':
     app.run(debug=True)