File size: 4,819 Bytes
ab097bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from PIL import Image

# Load model and tokenizer
model_name = "mistral-community/pixtral-12b-240910"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

@spaces.GPU(duration=120)
def generate_response(image, prompt, max_length, temperature):
    messages = [
        {"role": "system", "content": "You are a helpful assistant that can analyze images and text."},
        {"role": "user", "content": prompt}
    ]
    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)

    # Preprocess the image
    if image is not None:
        image = Image.open(image).convert("RGB")
        inputs = tokenizer(formatted_prompt, images=[image], return_tensors="pt", padding=True).to(model.device)
    else:
        inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(model.device)
    
    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            do_sample=True,
            temperature=temperature,
            top_k=100,
            top_p=0.95,
        )
    
    # Decode and return the response
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response

# Custom CSS
css = """
body {
    background-color: #1a1a2e;
    color: #e0e0e0;
    font-family: 'Arial', sans-serif;
}
.container {
    max-width: 900px;
    margin: auto;
    padding: 20px;
}
.gradio-container {
    background-color: #16213e;
    border-radius: 15px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.header {
    background-color: #0f3460;
    padding: 20px;
    border-radius: 15px 15px 0 0;
    text-align: center;
    margin-bottom: 20px;
}
.header h1 {
    color: #e94560;
    font-size: 2.5em;
    margin-bottom: 10px;
}
.header p {
    color: #a0a0a0;
}
.input-group, .output-group {
    background-color: #1a1a2e;
    padding: 20px;
    border-radius: 10px;
    margin-bottom: 20px;
}
.input-group label, .output-group label {
    color: #e94560;
    font-weight: bold;
}
.generate-btn {
    background-color: #e94560 !important;
    color: white !important;
    border: none !important;
    border-radius: 5px !important;
    padding: 10px 20px !important;
    font-size: 16px !important;
    cursor: pointer !important;
    transition: background-color 0.3s ease !important;
}
.generate-btn:hover {
    background-color: #c81e45 !important;
}
.example-prompts {
    background-color: #1f2b47;
    padding: 15px;
    border-radius: 10px;
    margin-bottom: 20px;
}
.example-prompts h3 {
    color: #e94560;
    margin-bottom: 10px;
}
.example-prompts ul {
    list-style-type: none;
    padding-left: 0;
}
.example-prompts li {
    margin-bottom: 5px;
    cursor: pointer;
    transition: color 0.3s ease;
}
.example-prompts li:hover {
    color: #e94560;
}
"""

# Example prompts
example_prompts = [
    "Describe this image in detail.",
    "What emotions does this image evoke?",
    "Imagine a story based on this image.",
    "What technical aspects of photography are demonstrated in this image?",
    "How might this image be used in advertising?"
]

# Gradio interface
with gr.Blocks(css=css) as iface:
    gr.HTML(
        """
        <div class="header">
            <h1>Pixtral-12B Multimodal Generation</h1>
            <p>Generate text responses based on images and prompts using the powerful Pixtral-12B model.</p>
        </div>
        """
    )

    with gr.Group():
        with gr.Group(elem_classes="example-prompts"):
            gr.HTML("<h3>Example Prompts:</h3>")
            example_buttons = [gr.Button(prompt) for prompt in example_prompts]

        with gr.Group(elem_classes="input-group"):
            image_input = gr.Image(type="filepath", label="Upload an image (optional)")
            prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", lines=5)
            max_length = gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length")
            temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
            generate_btn = gr.Button("Generate", elem_classes="generate-btn")

        with gr.Group(elem_classes="output-group"):
            output = gr.Textbox(label="Generated Text", lines=10)

    generate_btn.click(generate_response, inputs=[image_input, prompt, max_length, temperature], outputs=output)

    # Set up example prompt buttons
    for button in example_buttons:
        button.click(lambda x: x, inputs=[button], outputs=[prompt])

# Launch the app
iface.launch()