File size: 2,150 Bytes
475ca62
 
 
 
 
 
8c4ab6b
 
b51e1ff
8c4ab6b
 
 
 
 
 
 
 
 
b51e1ff
 
 
 
 
 
 
 
 
 
 
 
475ca62
 
 
 
 
 
 
 
 
 
 
 
 
b51e1ff
475ca62
b51e1ff
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from __future__ import annotations
from typing import Iterable
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
import time
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import numpy as np

# Load the model and tokenizer
model_id = "vikhyatk/moondream2"
revision = "2024-05-20"
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

def analyze_image_direct(image, question):
    # Convert PIL Image to the format expected by the model
    # Note: This step depends on the model's expected input format
    # For demonstration, assuming the model accepts PIL images directly
    enc_image = model.encode_image(image)  # This method might not exist; adjust based on actual model capabilities
    
    # Generate an answer to the question based on the encoded image
    # Note: This step is hypothetical and depends on the model's capabilities
    answer = model.answer_question(enc_image, question, tokenizer)  # Adjust based on actual model capabilities
    
    return answer

# Define a custom theme with purple color scheme
class PurpleTheme(Base):
    primary_color = "#9b59b6"  # Example purple shade
    primary_color_dark = "#8e44ad"  # Darker purple
    text_color = "#FFFFFF"  # White text for contrast
    background_color = "#5B2C6F"  # Deep purple background
    secondary_background_color = "#7D3C98"  # Lighter purple for secondary elements
    font = "Arial"

# Create a custom theme instance
purple_theme = PurpleTheme()

# Create Gradio interface with the custom theme
iface = gr.Interface(fn=analyze_image_direct,
                     theme=purple_theme,
                     inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Enter your question here...")],
                     outputs='text',
                     title="Direct Image Question Answering",
                     description="Upload an image and ask a question about it directly using the model.")

# Launch the interface
iface.launch()