Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,901 Bytes
62e9e39 67bff2d d58a265 b7430e6 d58a265 b7430e6 d58a265 38746a1 ed11a3f 2567c58 278a9c4 28691d0 d58a265 ed11a3f d58a265 ed11a3f 28691d0 38746a1 7bd22d1 d58a265 7bd22d1 28691d0 7bd22d1 ed11a3f 28691d0 ed11a3f 7bd22d1 28691d0 d58a265 ed11a3f d58a265 38746a1 d58a265 ed11a3f d58a265 28691d0 ed11a3f 28691d0 ed11a3f d58a265 28691d0 d58a265 67bff2d 28691d0 d58a265 38746a1 2649daa 38746a1 ed11a3f 38746a1 ed11a3f 7bd22d1 38746a1 ed11a3f 38746a1 28691d0 38746a1 28691d0 ed11a3f 38746a1 28691d0 38746a1 28691d0 d58a265 38746a1 d58a265 28691d0 38746a1 d58a265 38746a1 d58a265 67bff2d d58a265 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import spaces
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
import os
hf_token = os.environ.get("HF_TOKEN")
model_id = "CohereForAI/aya-vision-8b"
# Load the model and processor on startup.
try:
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForImageTextToText.from_pretrained(
model_id, device_map="auto", torch_dtype=torch.float16, use_auth_token=hf_token
)
model_status = "Model loaded successfully!"
except Exception as e:
processor = None
model = None
model_status = (
f"Error loading model: {e}\nMake sure to install the correct version of transformers with: "
"pip install 'git+https://github.com/huggingface/[email protected]'"
)
@spaces.GPU
def process_image_and_prompt(uploaded_image, image_url, prompt, temperature=0.3, max_tokens=300):
global processor, model
if processor is None or model is None:
return "Model failed to load. Please check the logs."
# Determine which image input to use:
if uploaded_image:
# If an image is uploaded, use the image directly.
messages = [{
"role": "user",
"content": [
{"type": "image", "image": uploaded_image},
{"type": "text", "text": prompt},
],
}]
elif image_url and image_url.strip():
# Otherwise, use the provided image URL.
img_url = image_url.strip()
messages = [{
"role": "user",
"content": [
{"type": "image", "url": img_url},
{"type": "text", "text": prompt},
],
}]
else:
return "Please provide either an image upload or an image URL."
try:
inputs = processor.apply_chat_template(
messages,
padding=True,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt"
).to(model.device)
gen_tokens = model.generate(
**inputs,
max_new_tokens=int(max_tokens),
do_sample=True,
temperature=float(temperature),
)
response = processor.tokenizer.decode(
gen_tokens[0][inputs.input_ids.shape[1]:],
skip_special_tokens=True
)
return response
except Exception as e:
return f"Error generating response: {e}"
# Example inputs for testing.
examples = [
[None, "https://media.istockphoto.com/id/458012057/photo/istanbul-turkey.jpg?s=612x612&w=0&k=20&c=qogAOVvkpfUyqLUMr_XJQyq-HkACXyYUSZbKhBlPrxo=", "What landmark is shown in this image?", 0.3, 300],
[None, "https://pbs.twimg.com/media/Fx7YvfQWYAIp6rZ?format=jpg&name=medium", "What does the text in this image say?", 0.3, 300],
[None, "https://upload.wikimedia.org/wikipedia/commons/d/da/The_Parthenon_in_Athens.jpg", "Describe esta imagen en español", 0.3, 300]
]
# Build the Gradio interface.
with gr.Blocks(title="Aya Vision 8B Demo") as demo:
gr.Markdown("# Aya Vision 8B Model Demo")
gr.Markdown(
"""
This app demonstrates the Aya Vision 8B model. You can either upload an image or provide an image URL. Enter a prompt along with the image.
"""
)
gr.Markdown(f"**Model Status:** {model_status}")
gr.Markdown("### Provide an Image")
with gr.Tab("Upload Image"):
# Using type="filepath" returns the local file path which is then passed directly.
image_upload = gr.Image(label="Upload Image", type="filepath")
with gr.Tab("Image URL"):
image_url_input = gr.Textbox(label="Image URL", placeholder="Enter a direct image URL")
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here", lines=3)
with gr.Accordion("Generation Settings", open=False):
temperature_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.3, label="Temperature")
max_tokens_slider = gr.Slider(minimum=50, maximum=1000, step=50, value=300, label="Max Tokens")
generate_btn = gr.Button("Generate Response", variant="primary")
output = gr.Textbox(label="Model Response", lines=10)
gr.Markdown("### Examples")
gr.Examples(
examples=examples,
inputs=[image_upload, image_url_input, prompt, temperature_slider, max_tokens_slider],
outputs=output,
fn=process_image_and_prompt
)
def generate_response(uploaded_image, image_url, prompt, temperature, max_tokens):
return process_image_and_prompt(uploaded_image, image_url, prompt, temperature, max_tokens)
generate_btn.click(
generate_response,
inputs=[image_upload, image_url_input, prompt, temperature_slider, max_tokens_slider],
outputs=output
)
if __name__ == "__main__":
demo.launch()
|