File size: 2,520 Bytes
475ca62
 
 
 
 
8c4ab6b
 
b51e1ff
8c4ab6b
 
 
 
 
 
 
 
 
b51e1ff
83e1fb4
 
 
 
b51e1ff
83e1fb4
 
b51e1ff
83e1fb4
 
 
 
b51e1ff
 
 
475ca62
 
83e1fb4
 
6d2ecc3
 
 
83e1fb4
 
 
 
 
 
 
475ca62
 
 
 
 
b51e1ff
475ca62
b51e1ff
 
 
 
 
 
83e1fb4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from __future__ import annotations
from typing import Iterable
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import numpy as np

# Load the model and tokenizer
model_id = "vikhyatk/moondream2"
revision = "2024-05-20"
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

def analyze_image_direct(image, question):
    # This is a placeholder; modify based on the actual capabilities of your model.
    # Here we assume that the model has methods `encode_image` and `answer_question` which might not exist.
    # You need to replace them with the actual methods your model uses to process images and generate answers.
    
    # Convert PIL Image to the format expected by the model
    # Example transformation (actual code will depend on model's requirements):
    enc_image = np.array(image)  # Placeholder transformation; adjust as needed
    
    # Hypothetical method calls (replace with actual methods):
    inputs = tokenizer.encode(question, return_tensors='pt')
    outputs = model.generate(inputs, max_length=50)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return answer

# Define a custom theme with purple color scheme
class PurpleTheme(Base):
    def __init__(self):
        super().__init__()
        self.primary_hue = colors.purple
        self.secondary_hue = colors.purple
        self.neutral_hue = colors.gray
        self.text_size = sizes.text_lg
        self.text_color = colors.white
        self.background_color = colors.purple_900
        self.primary_text_color = colors.white
        self.secondary_background_color = colors.purple_700
        self.secondary_text_color = colors.white
        self.font = fonts.GoogleFont("Arial")

# Create a custom theme instance
purple_theme = PurpleTheme()

# Create Gradio interface with the custom theme
iface = gr.Interface(fn=analyze_image_direct,
                     theme=purple_theme,
                     inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Enter your question here...")],
                     outputs='text',
                     title="Direct Image Question Answering",
                     description="Upload an image and ask a question about it directly using the model.")

# Launch the interface
iface.launch()