Update app.py
Browse files
app.py
CHANGED
@@ -8,13 +8,11 @@ moondream_client = Client("vikhyatk/moondream2")
|
|
8 |
# LLaMA için InferenceClient kullanıyoruz
|
9 |
llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
|
10 |
|
11 |
-
# Sohbet geçmişi
|
12 |
history = []
|
13 |
|
14 |
# Resim açıklama fonksiyonu
|
15 |
-
def describe_image(image, user_message):
|
16 |
-
global history
|
17 |
-
|
18 |
# Resmi Moondream2 API'sine gönderiyoruz
|
19 |
result = moondream_client.predict(
|
20 |
img=handle_file(image),
|
@@ -26,12 +24,10 @@ def describe_image(image, user_message):
|
|
26 |
history.append({"role": "user", "content": user_message})
|
27 |
history.append({"role": "assistant", "content": description})
|
28 |
|
29 |
-
return description
|
30 |
|
31 |
# Text ve history ile sohbet fonksiyonu
|
32 |
-
def chat_with_text(user_message, max_new_tokens=250):
|
33 |
-
global history
|
34 |
-
|
35 |
# Kullanıcı mesajını history'ye ekliyoruz
|
36 |
history.append({"role": "user", "content": user_message})
|
37 |
|
@@ -48,47 +44,45 @@ def chat_with_text(user_message, max_new_tokens=250):
|
|
48 |
assistant_reply = llama_result["choices"][0]["message"]["content"]
|
49 |
history.append({"role": "assistant", "content": assistant_reply})
|
50 |
|
51 |
-
return assistant_reply
|
52 |
|
53 |
# Resim ve/veya metin tabanlı sohbet fonksiyonu
|
54 |
-
def
|
55 |
-
|
|
|
|
|
|
|
|
|
56 |
|
57 |
if image: # Resim varsa
|
58 |
-
response = describe_image(image, user_message)
|
59 |
else: # Sadece metin mesajı varsa
|
60 |
-
response = chat_with_text(user_message, max_new_tokens)
|
61 |
|
62 |
-
return response
|
63 |
|
64 |
# Gradio arayüzü
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
submit_btn.click(
|
88 |
-
fn=process_input,
|
89 |
-
inputs=[chat_input, image_input, token_slider],
|
90 |
-
outputs=chat_output,
|
91 |
-
)
|
92 |
|
93 |
if __name__ == "__main__":
|
94 |
demo.launch(debug=True)
|
|
|
8 |
# LLaMA için InferenceClient kullanıyoruz
|
9 |
llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
|
10 |
|
11 |
+
# Sohbet geçmişi (global değişken olarak değil, fonksiyona parametre olarak kullanılır)
|
12 |
history = []
|
13 |
|
14 |
# Resim açıklama fonksiyonu
|
15 |
+
def describe_image(image, user_message, history):
|
|
|
|
|
16 |
# Resmi Moondream2 API'sine gönderiyoruz
|
17 |
result = moondream_client.predict(
|
18 |
img=handle_file(image),
|
|
|
24 |
history.append({"role": "user", "content": user_message})
|
25 |
history.append({"role": "assistant", "content": description})
|
26 |
|
27 |
+
return description, history
|
28 |
|
29 |
# Text ve history ile sohbet fonksiyonu
|
30 |
+
def chat_with_text(user_message, history, max_new_tokens=250):
|
|
|
|
|
31 |
# Kullanıcı mesajını history'ye ekliyoruz
|
32 |
history.append({"role": "user", "content": user_message})
|
33 |
|
|
|
44 |
assistant_reply = llama_result["choices"][0]["message"]["content"]
|
45 |
history.append({"role": "assistant", "content": assistant_reply})
|
46 |
|
47 |
+
return assistant_reply, history
|
48 |
|
49 |
# Resim ve/veya metin tabanlı sohbet fonksiyonu
|
50 |
+
def bot_streaming(message, history=None, max_new_tokens=250):
|
51 |
+
if history is None: # Eğer `history` verilmemişse boş bir liste kullanıyoruz
|
52 |
+
history = []
|
53 |
+
|
54 |
+
user_message = message.get("text", "")
|
55 |
+
image = message.get("image", None)
|
56 |
|
57 |
if image: # Resim varsa
|
58 |
+
response, history = describe_image(image, user_message, history)
|
59 |
else: # Sadece metin mesajı varsa
|
60 |
+
response, history = chat_with_text(user_message, history, max_new_tokens)
|
61 |
|
62 |
+
return response, history
|
63 |
|
64 |
# Gradio arayüzü
|
65 |
+
demo = gr.ChatInterface(
|
66 |
+
fn=bot_streaming,
|
67 |
+
title="Multimodal Chat Assistant",
|
68 |
+
additional_inputs=[
|
69 |
+
gr.Slider(
|
70 |
+
minimum=10,
|
71 |
+
maximum=500,
|
72 |
+
value=250,
|
73 |
+
step=10,
|
74 |
+
label="Maximum number of new tokens to generate",
|
75 |
+
)
|
76 |
+
],
|
77 |
+
description=(
|
78 |
+
"This demo combines text and image understanding using Moondream2 for visual "
|
79 |
+
"tasks and LLaMA for conversational AI. Upload an image, ask questions, "
|
80 |
+
"or just chat!"
|
81 |
+
),
|
82 |
+
stop_btn="Stop Generation",
|
83 |
+
fill_height=True,
|
84 |
+
multimodal=True,
|
85 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
if __name__ == "__main__":
|
88 |
demo.launch(debug=True)
|