File size: 7,019 Bytes
b1bc271
 
 
 
 
 
 
 
 
695a0da
b1bc271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fb1b17
 
b1bc271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695a0da
b1bc271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695a0da
b1bc271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗

import gradio as gr
import requests
import base64
import json
import os
from PIL import Image
from zhconv_rs import zhconv
from io import BytesIO

invoke_url = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions"
stream = True

def compress_image(image_path, max_size_kb=175):
    max_size_bytes = max_size_kb * 1024
    quality = 95
    with Image.open(image_path) as img:
        img.thumbnail((800, 800))
        while True:
            img_byte_arr = BytesIO()
            img.save(img_byte_arr, format='JPEG', quality=quality)
            if img_byte_arr.tell() <= max_size_bytes or quality <= 10:
                return img_byte_arr.getvalue()
            quality = max(quality - 10, 10)

def process_image(image_path, api_key, question):
    try:
        compressed_image = compress_image(image_path)
        image_b64 = base64.b64encode(compressed_image).decode()
        assert len(image_b64) < 180_000, \
            "Image is still too large after compression. Please try a smaller image."
        if not api_key:
            api_key = os.getenv("YOUR_API_KEY")
        prompt = f"{question} . Must reply to me in \"Traditional Chinese\"."
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Accept": "text/event-stream" if stream else "application/json"
        }
        payload = {
            "model": 'meta/llama-3.2-90b-vision-instruct',
            "messages": [
                {
                    "role": "user",
                    "content": f'{prompt} <img src="data:image/jpeg;base64,{image_b64}" />'
                }
            ],
            "max_tokens": 512,
            "temperature": 1.00,
            "top_p": 1.00,
            "stream": stream
        }
        response = requests.post(invoke_url, headers=headers, json=payload, stream=True)
        if response.status_code == 200:
            full_response = ""
            for line in response.iter_lines():
                if line:
                    line = line.decode('utf-8')
                    if line.startswith('data: '):
                        json_str = line[6:]
                        if json_str.strip() == '[DONE]':
                            break
                        try:
                            json_obj = json.loads(json_str)
                            content = json_obj['choices'][0]['delta'].get('content', '')
                            full_response += content
                            yield full_response
                        except json.JSONDecodeError:
                            print(f"Failed to parse JSON: {json_str}")
            full_response = zhconv(full_response, "zh-tw")
            return full_response
        elif response.status_code == 402:
            return "錯誤:API 帳號積分已過期。請至 NVIDIA 官網檢查您的帳號狀態。"
        else:
            error_message = f"錯誤 {response.status_code}: {response.text}"
            print(error_message)
            return f"發生錯誤。請稍後再試或聯繫管理員。錯誤代碼:{response.status_code}"
    except Exception as e:
        print(f"發生異常:{str(e)}")
        return f"處理請求時發生錯誤:{str(e)}"

custom_css = """

.center-aligned {

    text-align: center !important;

    color: #ff4081;

    text-shadow: 2px 2px 4px rgba(0,0,0,0.1);

    margin-bottom: -5px !important;

}

.gen-button {

    border-radius: 10px !important;

    background-color: #ff4081 !important;

    color: white !important;

    font-weight: bold !important;

    transition: all 0.3s ease !important;

}

.gen-button:hover {

    background-color: #f50057 !important;

    transform: scale(1.05);

}

.gr-input, .gr-box, .gr-dropdown {

    border-radius: 10px !important;

    border: 2px solid #ff4081 !important;

}

.gr-input:focus, .gr-box:focus, .gr-dropdown:focus {

    border-color: #f50057 !important;

    box-shadow: 0 0 0 2px rgba(245,0,87,0.2) !important;

}

.input-background {

    background-color: #B7E0FF !important;

    padding: 15px !important;

    border-radius: 10px !important;

}

.gr-box {

    border-radius: 10px !important;

    border: 2px solid #ff4081 !important;

}

.api-background {

    background-color: #FFCFB3 !important;

    padding: 15px !important;

    border-radius: 10px !important;

}

.output-background {

    background-color: #FFF4B5 !important;

    padding: 15px !important;

    border-radius: 10px !important;

}

.image-background {

    border-radius: 10px !important;

    border: 2px solid #B7E0FF !important;

}

.clear-button {

    border-radius: 10px !important;

    background-color: #333333 !important;

    color: white !important;

    font-weight: bold !important;

    transition: all 0.3s ease !important;

}

.clear-button:hover {

    background-color: #000000 !important;

    transform: scale(1.05);

}

"""

with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
    gr.Markdown("# 👹 Llama 3.2 90B Vision. Deployed by 江信宗", elem_classes="center-aligned")
    image_input = gr.Image(type="filepath", label="上傳圖片", elem_classes="image-background")
    with gr.Row():
        question_input = gr.Textbox(label="請輸入您的問題", placeholder="例如:What is in this image?", scale=2, elem_classes="input-background")
        api_key_input = gr.Textbox(type="password", label="請輸入您的 API Key", placeholder="API authentication key for large language models", scale=1, elem_classes="api-background")
    output = gr.Textbox(label="Vision Model 回覆", elem_classes="output-background", max_lines=20)
    with gr.Row():
        submit_button = gr.Button("傳送", variant="primary", scale=2, elem_classes="gen-button")
        clear_button = gr.Button("清除", variant="secondary", scale=1, elem_classes="clear-button")
        def clear_inputs():
            gr.Info("已成功清除所有內容,歡迎繼續提問......")
            return None, "", ""
        clear_button.click(
            fn=clear_inputs,
            inputs=None,
            outputs=[image_input, question_input, output]
        )
        submit_button.click(fn=process_image, inputs=[image_input, api_key_input, question_input], outputs=output)
    gr.HTML("""

        <script>

            document.addEventListener('click', function(e) {

                if (e.target && e.target.textContent === '清除') {

                    var fileInput = document.querySelector('.upload-button input[type=file]');

                    if (fileInput) {

                        fileInput.value = '';

                    }

                }

            });

        </script>

    """)

if __name__ == "__main__":
    if "SPACE_ID" in os.environ:
        demo.queue().launch()
    else:
        demo.queue().launch(share=True, show_api=False)