Spaces:

Krish45
/

JARVIS

Sleeping

App Files Files Community

Krish45 commited on 8 days ago

Commit

5ed6726

verified ·

1 Parent(s): dd3768c

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -23

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import threading
-import time
 import os
 # Model config
 model_name = "Qwen/Qwen2.5-0.5B-Instruct"
@@ -19,29 +19,33 @@ def load_model():
     global tokenizer, model
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # Ensure offload folder exists
         os.makedirs(offload_dir, exist_ok=True)
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
-            load_in_8bit=True,           # Quantize to 8-bit
             device_map="auto",
-            offload_folder=offload_dir,  # Offload some weights to disk
             torch_dtype=torch.float16
         )
 # Chatbot prediction function
-def predict(history, message):
     load_model()
     history = history or []
-    history.append((message, ""))
-    # Convert to Qwen message format
-    messages = []
-    for human, bot in history:
-        if human:
-            messages.append({"role": "user", "content": human})
-        if bot:
-            messages.append({"role": "assistant", "content": bot})
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
@@ -50,14 +54,13 @@ def predict(history, message):
     reply = ""
     try:
-        with model_lock:  # Serialize CPU inference safely
             with torch.no_grad():
                 start = time.time()
                 generated_ids = model.generate(**model_inputs, max_new_tokens=256)
-                if time.time() - start > 30:  # 30s timeout
                     reply = "[Response timed out]"
                 else:
-                    # Remove input_ids from output
                     generated_ids = [
                         output_ids[len(input_ids):]
                         for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -66,24 +69,35 @@ def predict(history, message):
     except Exception as e:
         reply = f"[Error: {str(e)}]"
-    history[-1] = (message, reply)
     return history, ""
-# Keep-alive endpoint for local client ping
 def keep_alive(msg="ping"):
     return "pong"
 # Gradio UI
 with gr.Blocks() as demo:
     with gr.Tab("Chatbot"):
-        chatbot = gr.Chatbot()
         msg = gr.Textbox(placeholder="Type your message here...")
-        msg.submit(predict, [chatbot, msg], [chatbot, msg])
     with gr.Tab("Keep Alive"):
         gr.Textbox(label="Ping", value="ping", interactive=False)
         gr.Button("Ping").click(keep_alive, inputs=None, outputs=None)
-# Multi-user queue with concurrency
-demo.queue(concurrency_count=4, max_size=8)  # 4 simultaneous, 8 waiting
-demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import threading
 import os
+import time
 # Model config
 model_name = "Qwen/Qwen2.5-0.5B-Instruct"
     global tokenizer, model
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         os.makedirs(offload_dir, exist_ok=True)
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
+            load_in_8bit=True,
             device_map="auto",
+            offload_folder=offload_dir,
             torch_dtype=torch.float16
         )
 # Chatbot prediction function
+def predict(history, message, bot_name="Bot", personality="wise AI", tone="friendly"):
     load_model()
     history = history or []
+    # Append user message
+    history.append({"role": "user", "content": message})
+    # Build dynamic system prompt
+    system_prompt = (
+        f"You are {bot_name}, a {personality}.\n"
+        f"You express emotion, think logically, and talk like a wise, emotional, intelligent human being.\n"
+        f"Your tone is always {tone}."
+    )
+    # Prepare messages for Qwen
+    messages = [{"role": "system", "content": system_prompt}]
+    for msg in history:
+        messages.append({"role": msg["role"], "content": msg["content"]})
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     reply = ""
     try:
+        with model_lock:
             with torch.no_grad():
                 start = time.time()
                 generated_ids = model.generate(**model_inputs, max_new_tokens=256)
+                if time.time() - start > 30:
                     reply = "[Response timed out]"
                 else:
                     generated_ids = [
                         output_ids[len(input_ids):]
                         for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     except Exception as e:
         reply = f"[Error: {str(e)}]"
+    # Append bot reply
+    history.append({"role": "assistant", "content": reply})
     return history, ""
+# Keep-alive endpoint
 def keep_alive(msg="ping"):
     return "pong"
 # Gradio UI
 with gr.Blocks() as demo:
     with gr.Tab("Chatbot"):
+        chatbot = gr.Chatbot(type="messages")
         msg = gr.Textbox(placeholder="Type your message here...")
+        bot_name_input = gr.Textbox(label="Bot Name", value="Bot")
+        personality_input = gr.Textbox(label="Personality", value="wise AI")
+        tone_input = gr.Textbox(label="Tone", value="friendly")
+        msg.submit(
+            predict,
+            inputs=[chatbot, msg, bot_name_input, personality_input, tone_input],
+            outputs=[chatbot, msg]
+        )
     with gr.Tab("Keep Alive"):
         gr.Textbox(label="Ping", value="ping", interactive=False)
         gr.Button("Ping").click(keep_alive, inputs=None, outputs=None)
+# Enable request queue (multi-user safe)
+demo.queue()  # simple queue; compatible with current Gradio versions
+# Launch Space
+demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)