Spaces:

suayptalha
/

QwQ-32B-Preview-Vision

Running

App Files Files Community

suayptalha commited on Dec 18, 2024

Commit

b5de101

verified ·

1 Parent(s): 5bf0f2d

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -41

app.py CHANGED Viewed

@@ -8,13 +8,11 @@ moondream_client = Client("vikhyatk/moondream2")
 # LLaMA için InferenceClient kullanıyoruz
 llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
-# Sohbet geçmişi
 history = []
 # Resim açıklama fonksiyonu
-def describe_image(image, user_message):
-    global history
     # Resmi Moondream2 API'sine gönderiyoruz
     result = moondream_client.predict(
         img=handle_file(image),
@@ -26,12 +24,10 @@ def describe_image(image, user_message):
     history.append({"role": "user", "content": user_message})
     history.append({"role": "assistant", "content": description})
-    return description
 # Text ve history ile sohbet fonksiyonu
-def chat_with_text(user_message, max_new_tokens=250):
-    global history
     # Kullanıcı mesajını history'ye ekliyoruz
     history.append({"role": "user", "content": user_message})
@@ -48,47 +44,45 @@ def chat_with_text(user_message, max_new_tokens=250):
     assistant_reply = llama_result["choices"][0]["message"]["content"]
     history.append({"role": "assistant", "content": assistant_reply})
-    return assistant_reply
 # Resim ve/veya metin tabanlı sohbet fonksiyonu
-def process_input(user_message="", image=None, max_new_tokens=250):
-    global history
     if image:  # Resim varsa
-        response = describe_image(image, user_message)
     else:  # Sadece metin mesajı varsa
-        response = chat_with_text(user_message, max_new_tokens)
-    return response
 # Gradio arayüzü
-with gr.Blocks() as demo:
-    gr.Markdown("# Multimodal Chat Assistant")
-    gr.Markdown(
-        "This demo combines text and image understanding using Moondream2 for visual tasks and LLaMA for conversational AI."
-    )
-    with gr.Row():
-        with gr.Column():
-            chat_input = gr.Textbox(label="Enter your message")
-            image_input = gr.Image(label="Upload an image (optional)", type="pil")
-            token_slider = gr.Slider(
-                minimum=10,
-                maximum=500,
-                value=250,
-                step=10,
-                label="Maximum number of new tokens to generate",
-            )
-            submit_btn = gr.Button("Submit")
-        with gr.Column():
-            chat_output = gr.Textbox(label="Assistant Reply", lines=10)
-    submit_btn.click(
-        fn=process_input,
-        inputs=[chat_input, image_input, token_slider],
-        outputs=chat_output,
-    )
 if __name__ == "__main__":
     demo.launch(debug=True)

 # LLaMA için InferenceClient kullanıyoruz
 llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
+# Sohbet geçmişi (global değişken olarak değil, fonksiyona parametre olarak kullanılır)
 history = []
 # Resim açıklama fonksiyonu
+def describe_image(image, user_message, history):
     # Resmi Moondream2 API'sine gönderiyoruz
     result = moondream_client.predict(
         img=handle_file(image),
     history.append({"role": "user", "content": user_message})
     history.append({"role": "assistant", "content": description})
+    return description, history
 # Text ve history ile sohbet fonksiyonu
+def chat_with_text(user_message, history, max_new_tokens=250):
     # Kullanıcı mesajını history'ye ekliyoruz
     history.append({"role": "user", "content": user_message})
     assistant_reply = llama_result["choices"][0]["message"]["content"]
     history.append({"role": "assistant", "content": assistant_reply})
+    return assistant_reply, history
 # Resim ve/veya metin tabanlı sohbet fonksiyonu
+def bot_streaming(message, history=None, max_new_tokens=250):
+    if history is None:  # Eğer `history` verilmemişse boş bir liste kullanıyoruz
+        history = []
+    user_message = message.get("text", "")
+    image = message.get("image", None)
     if image:  # Resim varsa
+        response, history = describe_image(image, user_message, history)
     else:  # Sadece metin mesajı varsa
+        response, history = chat_with_text(user_message, history, max_new_tokens)
+    return response, history
 # Gradio arayüzü
+demo = gr.ChatInterface(
+    fn=bot_streaming,
+    title="Multimodal Chat Assistant",
+    additional_inputs=[
+        gr.Slider(
+            minimum=10,
+            maximum=500,
+            value=250,
+            step=10,
+            label="Maximum number of new tokens to generate",
+        )
+    ],
+    description=(
+        "This demo combines text and image understanding using Moondream2 for visual "
+        "tasks and LLaMA for conversational AI. Upload an image, ask questions, "
+        "or just chat!"
+    ),
+    stop_btn="Stop Generation",
+    fill_height=True,
+    multimodal=True,
+)
 if __name__ == "__main__":
     demo.launch(debug=True)