Spaces:

suayptalha
/

QwQ-32B-Preview-Vision

Running

App Files Files Community

suayptalha commited on Dec 18, 2024

Commit

7b6d332

verified ·

1 Parent(s): 925d9fa

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -72

app.py CHANGED Viewed

@@ -2,91 +2,72 @@ import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import InferenceClient
-# Moondream2 için Client kullanıyoruz
-moondream_client = Client("vikhyatk/moondream2")
-# LLaMA için InferenceClient kullanıyoruz
-llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
-# Sohbet geçmişi
-history = []
-# Resim açıklama fonksiyonu
-def describe_image(image, user_message, history):
-    # Resmi Moondream2 API'sine gönderiyoruz
-    result = moondream_client.predict(
-        img=handle_file(image),
         prompt="Describe this image.",
         api_name="/answer_question"
     )
-    description = result  # Moondream2'den açıklama alıyoruz
-    history.append({"role": "user", "content": user_message})  # string olarak
-    history.append({"role": "assistant", "content": description})  # string olarak
-    # Resim açıklamasına alt metin (alt_text) ekliyoruz
-    image_message = {
-        "type": "image",
-        "data": image,
-        "alt_text": description  # Resmin açıklaması olarak alt metni ekliyoruz
-    }
-    return description, history, image_message
-# Text ve history ile sohbet fonksiyonu
-def chat_with_text(user_message, history, max_new_tokens=250):
-    # Kullanıcı mesajını history'ye ekliyoruz
-    history.append({"role": "user", "content": user_message})  # string olarak
-    # Tüm geçmişi LLaMA'ya gönderiyoruz
-    texts = [{"role": msg["role"], "content": msg["content"]} for msg in history]
-    llama_result = llama_client.chat_completion(
-        messages=texts,
-        max_tokens=max_new_tokens,
-        temperature=0.7,
-        top_p=0.95
-    )
-    # Asistan cevabını alıyoruz ve history'ye ekliyoruz
-    assistant_reply = llama_result["choices"][0]["message"]["content"]
-    history.append({"role": "assistant", "content": assistant_reply})  # string olarak
-    return assistant_reply, history
-# Resim ve/veya metin tabanlı sohbet fonksiyonu
-def bot_streaming(message, history=None, max_new_tokens=250):
-    if history is None:  # Eğer `history` verilmemişse boş bir liste kullanıyoruz
-        history = []
-    user_message = message.get("text", "")
-    image = message.get("image", None)
-    if image:  # Resim varsa
-        response, history, image_message = describe_image(image, user_message, history)
-        return image_message, history  # Resim ve tarihçeyi döndürüyoruz
-    else:  # Sadece metin mesajı varsa
-        response, history = chat_with_text(user_message, history, max_new_tokens)
-        return response, history  # Yalnızca metin döndürülmeli
-# Gradio arayüzü
 demo = gr.ChatInterface(
-    fn=bot_streaming,  # Buradaki fonksiyon bot_streaming
-    title="Multimodal Chat Assistant",
     additional_inputs=[
-        gr.Textbox(value="You are a friendly assistant.", label="System message"),
-        gr.Slider(minimum=10, maximum=500, value=250, step=10, label="Maximum number of new tokens to generate"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
-    description=(
-        "This demo combines text and image understanding using Moondream2 for visual "
-        "tasks and LLaMA for conversational AI. Upload an image, ask questions, "
-        "or just chat!"
-    ),
-    stop_btn="Stop Generation",
-    fill_height=True,
-    multimodal=True,
 )
 if __name__ == "__main__":
-    demo.launch(debug=True)

 from gradio_client import Client, handle_file
 from huggingface_hub import InferenceClient
+# Initialize the InferenceClient for FastLlama model
+client = InferenceClient("Qwen/QwQ-32B-Preview")
+# Initialize the Moondream Client for image description
+moondream_client = Client("vikhyatk/moondream2")
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    image_input
+):
+    # Step 1: Handle the image and get its description using Moondream API
+    image_file = handle_file(image_input)
+    image_description = moondream_client.predict(
+        img=image_file,
         prompt="Describe this image.",
         api_name="/answer_question"
     )
+    # Step 2: Create the messages for the chat model
+    messages = [{"role": "system", "content": system_message}]
+    # Add history to the messages
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    # Add the image description to the user message
+    messages.append({"role": "user", "content": f"Here is the description of the image: {image_description}. Can you comment on it?"})
+    # Step 3: Get the response from the assistant
+    response = ""
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message.choices[0].delta.content
+        response += token
+        yield response
+# Set up Gradio interface
 demo = gr.ChatInterface(
+    respond,
     additional_inputs=[
+        gr.Textbox(value="You are a friendly assistant named FastLlama.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+        gr.Image(type="pil", label="Upload Image for Description")  # Image input
     ],
 )
 if __name__ == "__main__":
+    demo.launch()