gemma-3-chat-api

Sleeping

App Files Files Community

Pamudu13 commited on Apr 3

Commit

91d834f

verified ·

1 Parent(s): acda467

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -42

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ app = Flask(__name__, template_folder=os.getcwd())
 # Default settings
 class ChatConfig:
-    MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
     DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
     DEFAULT_MAX_TOKENS = 512
     DEFAULT_TEMP = 0.3
@@ -21,7 +21,7 @@ class ChatConfig:
 HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
 client = InferenceClient(
     ChatConfig.MODEL,
-    token=HF_TOKEN  # Add your Hugging Face token here
 )
 embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
 vector_dim = 384  # Embedding size
@@ -39,26 +39,9 @@ def extract_text_from_pdf(pdf_stream):
 def create_vector_db(text_chunks):
     """Embeds text chunks and adds them to FAISS index"""
     global documents, index
-    # Reinitialize the FAISS index
-    index = faiss.IndexFlatL2(vector_dim)
     documents = text_chunks
     embeddings = embed_model.encode(text_chunks)
-    # Convert embeddings to np.float32 for FAISS
-    embeddings = np.array(embeddings, dtype=np.float32)
-    # Ensure that embeddings have the correct shape (should be 2D, with each vector having the right dimension)
-    if embeddings.ndim == 1:  # If only one embedding, reshape it
-        embeddings = embeddings.reshape(1, -1)
-    # Add embeddings to the FAISS index
-    index.add(embeddings)
-    # Check if adding was successful (optional)
-    if index.ntotal == 0:
-        print("Error: FAISS index is empty after adding embeddings.")
 def search_relevant_text(query):
     """Finds the most relevant text chunk for the given query"""
@@ -79,34 +62,30 @@ def generate_response(
     context = search_relevant_text(message)  # Get relevant content from PDF
-    # Format the prompt for Mistral
-    prompt = f"""<s>[INST] {system_message}
-Context from the PDF:
-{context}
-User Question: {message} [/INST]"""
-    # Add conversation history if it exists
-    for prev_msg, prev_response in history:
-        prompt += f" {prev_response} </s>[INST] {prev_msg} [/INST]"
     try:
-        response = client.text_generation(
-            prompt,
-            max_new_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
-            stream=True
-        )
-        full_response = ""
-        for chunk in response:
-            full_response += chunk
-        return full_response
     except Exception as e:
         print(f"Error generating response: {str(e)}")
-        return "I apologize, but I encountered an error while generating the response. Please try again."
 @app.route('/')
 def index():
@@ -149,7 +128,7 @@ def ask_question():
     message = request.json.get('message')
     history = request.json.get('history', [])
     response = generate_response(message, history)
-    return jsonify({"response": response})
 if __name__ == '__main__':
     app.run(debug=True)

 # Default settings
 class ChatConfig:
+    MODEL = "google/gemma-3-27b-it"  # Change back to Gemma
     DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
     DEFAULT_MAX_TOKENS = 512
     DEFAULT_TEMP = 0.3
 HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
 client = InferenceClient(
     ChatConfig.MODEL,
+    token=HF_TOKEN
 )
 embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
 vector_dim = 384  # Embedding size
 def create_vector_db(text_chunks):
     """Embeds text chunks and adds them to FAISS index"""
     global documents, index
     documents = text_chunks
     embeddings = embed_model.encode(text_chunks)
+    index.add(np.array(embeddings, dtype=np.float32))
 def search_relevant_text(query):
     """Finds the most relevant text chunk for the given query"""
     context = search_relevant_text(message)  # Get relevant content from PDF
+    messages = [{"role": "system", "content": system_message}]
+    for user_msg, bot_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {message}"})
     try:
+        response = ""
+        for chunk in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
             temperature=temperature,
             top_p=top_p,
+        ):
+            token = chunk.choices[0].delta.content or ""
+            response += token
+            yield response
     except Exception as e:
         print(f"Error generating response: {str(e)}")
+        yield "I apologize, but I encountered an error while generating the response. Please try again."
 @app.route('/')
 def index():
     message = request.json.get('message')
     history = request.json.get('history', [])
     response = generate_response(message, history)
+    return jsonify({"response": "".join(response)})  # Join all streamed responses
 if __name__ == '__main__':
     app.run(debug=True)