Spaces:

Ahmed007
/

Modarb-AI

Running

File size: 1,316 Bytes

8c4ab6b
e76263f
 
b51e1ff
8c4ab6b
 
 
 
 
 
 
 
 
b51e1ff
 
e76263f
 
 
b51e1ff
e76263f
 
 
b51e1ff
 
e76263f
20a4635
4d81aab
 
 
 
b51e1ff
4d81aab

from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import gradio as gr
import numpy as np

# Load the model and tokenizer
model_id = "vikhyatk/moondream2"
revision = "2024-05-20"
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

def analyze_image_direct(image, question):
    # Convert PIL Image to the format expected by the model
    # Note: This step depends on the model's expected input format
    # For demonstration, assuming the model accepts PIL images directly
    enc_image = model.encode_image(image)  # This method might not exist; adjust based on actual model capabilities
    
    # Generate an answer to the question based on the encoded image
    # Note: This step is hypothetical and depends on the model's capabilities
    answer = model.answer_question(enc_image, question, tokenizer)  # Adjust based on actual model capabilities
    
    return answer
# Create a Gradio interface
with gr.Blocks() as block:
    image = gr.Image(label="Image")
    question = gr.Textbox(label="Question")
    output = gr.Textbox(label="Answer")
    block.add(gr.Interface(fn=analyze_image_direct, inputs=[image, question], outputs=output))

block.launch()