Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -68,14 +68,8 @@ def bot_streaming(message, history):
|
|
68 |
|
69 |
# Get image path
|
70 |
image = None
|
71 |
-
if message["files"]:
|
72 |
image = message["files"][-1]["path"]
|
73 |
-
else:
|
74 |
-
for i, hist in enumerate(history):
|
75 |
-
if type(hist[0])==tuple:
|
76 |
-
image = hist[0][0]
|
77 |
-
image_turn = i
|
78 |
-
break
|
79 |
|
80 |
# Check if image is available
|
81 |
if image is None:
|
@@ -83,22 +77,16 @@ def bot_streaming(message, history):
|
|
83 |
|
84 |
# Prepare conversation messages
|
85 |
messages = []
|
86 |
-
if len(history) > 0
|
87 |
-
messages.append({"role": "user", "content": f'<image>\n{history[1][0]}'})
|
88 |
-
messages.append({"role": "assistant", "content": history[1][1] })
|
89 |
-
for human, assistant in history[2:]:
|
90 |
-
messages.append({"role": "user", "content": human })
|
91 |
-
messages.append({"role": "assistant", "content": assistant })
|
92 |
-
messages.append({"role": "user", "content": message['text']})
|
93 |
-
elif len(history) > 0 and image is None:
|
94 |
for human, assistant in history:
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
99 |
messages.append({"role": "user", "content": f"<image>\n{message['text']}"})
|
100 |
-
elif len(history) == 0 and image is None:
|
101 |
-
messages.append({"role": "user", "content": message['text'] })
|
102 |
|
103 |
# Process image
|
104 |
image = Image.open(image).convert("RGB")
|
@@ -108,8 +96,24 @@ def bot_streaming(message, history):
|
|
108 |
messages,
|
109 |
tokenize=False,
|
110 |
add_generation_prompt=True)
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
# Prepare stopping criteria
|
115 |
stop_str = '<|im_end|>'
|
@@ -140,16 +144,65 @@ def bot_streaming(message, history):
|
|
140 |
yield generated_text_without_prompt
|
141 |
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
|
|
155 |
demo.queue().launch()
|
|
|
68 |
|
69 |
# Get image path
|
70 |
image = None
|
71 |
+
if "files" in message and message["files"]:
|
72 |
image = message["files"][-1]["path"]
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
# Check if image is available
|
75 |
if image is None:
|
|
|
77 |
|
78 |
# Prepare conversation messages
|
79 |
messages = []
|
80 |
+
if len(history) > 0:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
for human, assistant in history:
|
82 |
+
# Skip None responses (which can happen during streaming)
|
83 |
+
if assistant is not None:
|
84 |
+
messages.append({"role": "user", "content": human})
|
85 |
+
messages.append({"role": "assistant", "content": assistant})
|
86 |
+
# Add the current message
|
87 |
+
messages.append({"role": "user", "content": f"<image>\n{message['text']}" if len(messages) == 0 else message['text']})
|
88 |
+
else:
|
89 |
messages.append({"role": "user", "content": f"<image>\n{message['text']}"})
|
|
|
|
|
90 |
|
91 |
# Process image
|
92 |
image = Image.open(image).convert("RGB")
|
|
|
96 |
messages,
|
97 |
tokenize=False,
|
98 |
add_generation_prompt=True)
|
99 |
+
|
100 |
+
# Handle image embedding in text
|
101 |
+
if '<image>' in text:
|
102 |
+
text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
|
103 |
+
input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
|
104 |
+
else:
|
105 |
+
# If no <image> tag was added (possible in some chat templates), add it manually
|
106 |
+
input_ids = tokenizer(text).input_ids
|
107 |
+
# Find the position to insert the image token
|
108 |
+
# For simplicity, insert after the user message start
|
109 |
+
user_start_pos = 0
|
110 |
+
for i, token in enumerate(input_ids):
|
111 |
+
if tokenizer.decode([token]) == '<|im_start|>user':
|
112 |
+
user_start_pos = i + 2 # +2 to get past the tag
|
113 |
+
break
|
114 |
+
# Insert image token
|
115 |
+
input_ids = input_ids[:user_start_pos] + [-200] + input_ids[user_start_pos:]
|
116 |
+
input_ids = torch.tensor([input_ids], dtype=torch.long)
|
117 |
|
118 |
# Prepare stopping criteria
|
119 |
stop_str = '<|im_end|>'
|
|
|
144 |
yield generated_text_without_prompt
|
145 |
|
146 |
|
147 |
+
# Create a gradio Blocks interface instead of ChatInterface
|
148 |
+
# This avoids the schema validation issues
|
149 |
+
with gr.Blocks(title="🚀nanoLLaVA-1.5") as demo:
|
150 |
+
gr.Markdown("## 🚀nanoLLaVA-1.5")
|
151 |
+
gr.Markdown("Try [nanoLLaVA](https://huggingface.co/qnguyen3/nanoLLaVA-1.5) in this demo. Built on top of [Quyen-SE-v0.1](https://huggingface.co/vilm/Quyen-SE-v0.1) (Qwen1.5-0.5B) and [Google SigLIP-400M](https://huggingface.co/google/siglip-so400m-patch14-384). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.")
|
152 |
+
|
153 |
+
chatbot = gr.Chatbot(height=500)
|
154 |
+
with gr.Row():
|
155 |
+
with gr.Column(scale=0.8):
|
156 |
+
msg = gr.Textbox(
|
157 |
+
show_label=False,
|
158 |
+
placeholder="Enter text and upload an image",
|
159 |
+
container=False
|
160 |
+
)
|
161 |
+
with gr.Column(scale=0.2):
|
162 |
+
btn = gr.Button("Submit")
|
163 |
+
stop_btn = gr.Button("Stop Generation")
|
164 |
+
|
165 |
+
upload_btn = gr.UploadButton("Upload Image", file_types=["image"])
|
166 |
+
current_img = gr.State(None)
|
167 |
+
|
168 |
+
# Example images
|
169 |
+
examples = gr.Examples(
|
170 |
+
examples=[
|
171 |
+
["Who is this guy?", "./demo_1.jpg"],
|
172 |
+
["What does the text say?", "./demo_2.jpeg"]
|
173 |
+
],
|
174 |
+
inputs=[msg, upload_btn]
|
175 |
+
)
|
176 |
+
|
177 |
+
def upload_image(image):
|
178 |
+
return image
|
179 |
+
|
180 |
+
def add_text(history, text, image):
|
181 |
+
if image is None and (not history or type(history[0][0]) != tuple):
|
182 |
+
return history + [[text, "Please upload an image first."]]
|
183 |
+
return history + [[text, None]]
|
184 |
+
|
185 |
+
def bot_response(history, image):
|
186 |
+
message = {"text": history[-1][0], "files": [{"path": image}] if image else []}
|
187 |
+
history_format = history[:-1] # All except the last message
|
188 |
+
|
189 |
+
response = ""
|
190 |
+
for chunk in bot_streaming(message, history_format):
|
191 |
+
response = chunk
|
192 |
+
history[-1][1] = response
|
193 |
+
yield history
|
194 |
+
|
195 |
+
upload_btn.upload(upload_image, upload_btn, current_img)
|
196 |
+
|
197 |
+
msg.submit(add_text, [chatbot, msg, current_img], chatbot).then(
|
198 |
+
bot_response, [chatbot, current_img], chatbot
|
199 |
+
)
|
200 |
+
|
201 |
+
btn.click(add_text, [chatbot, msg, current_img], chatbot).then(
|
202 |
+
bot_response, [chatbot, current_img], chatbot
|
203 |
+
)
|
204 |
+
|
205 |
+
stop_btn.click(None, None, None, cancels=[bot_response])
|
206 |
|
207 |
+
# Launch the app with queuing
|
208 |
demo.queue().launch()
|