AskMoondream / app_v2.py
hanzla's picture
chat interface v2
2ac2001
raw
history blame
2.96 kB
import gradio as gr
import os
import time
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision,
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
attn_implementation="flash_attention_2"
)
moondream.eval()
def print_like_dislike(x: gr.LikeData):
print(x.index, x.value, x.liked)
def add_message(history, message):
# Handle image and text input
if message["files"]:
for x in message["files"]:
history.append(((x,), None))
if message["text"] is not None:
history.append((message["text"], None))
return history, gr.MultimodalTextbox(value=None, interactive=False)
def bot(history):
# Reverse search through the last 5 messages for an image file
last_five_messages = history[-5:] # Get the last five messages
image_path = None
for message in reversed(last_five_messages):
if isinstance(message[0], tuple) and isinstance(message[0][0], str):
image_path = message[0][0]
break
if image_path:
try:
image = Image.open(image_path) # Try to open the image using Pillow
image_embeds = moondream.encode_image(image)
print(moondream.answer_question(image_embeds, "Describe this image.", tokenizer))
response = f"Successfully loaded image from path: {image_path}"
except IOError:
response = "Failed to open image. Please check the image path or file permissions."
elif isinstance(history[-1][0], str):
response = "HOLA, it's a string" # Handle text messages
else:
response = "**I can only process text messages and images. Please send some text or upload an image!**"
history[-1][1] = ""
for character in response:
history[-1][1] += character
yield history
with gr.Blocks(theme="Monochrome") as demo:
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
bubble_full_width=False
)
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
chat_msg = chat_input.submit(add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input])
bot_msg = chat_msg.then(bot, inputs=chatbot, outputs=chatbot, api_name="bot_response")
bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, outputs=[chat_input])
chatbot.like(print_like_dislike, None, None)
demo.queue()
demo.launch()