File size: 3,797 Bytes
0e0b22e
 
 
8f10c38
0e0b22e
 
 
 
 
 
78625da
0e0b22e
339da97
0e0b22e
 
b45fd05
0e0b22e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f10c38
0e0b22e
 
8f06c65
0e0b22e
 
 
 
 
 
2c334ee
0e0b22e
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import base64
import os
import uuid
import time
import cv2
import gradio as gr
import numpy as np
import requests

MARKDOWN = """
# ♠ Alyxsissy GPT-Vision Testing 👁‍🗨

Skeleton API structure for GPT-4-Vision. Implimentation into SissyGPT WiP.
"""
AVATARS = (
    "https://raw.githubusercontent.com/Nymbo/Web-Hosted-Images/main/Alyxsissy%20Favicon.png?token=GHSAT0AAAAAACJF7K6DI4PXYMGQU3TPEZ2GZL2LFXA",
    "https://media.roboflow.com/spaces/openai-white-logomark.png"
)
IMAGE_CACHE_DIRECTORY = "data"
API_URL = "https://api.openai.com/v1/chat/completions"


def preprocess_image(image: np.ndarray) -> np.ndarray:
    image = np.fliplr(image)
    return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


def encode_image_to_base64(image: np.ndarray) -> str:
    success, buffer = cv2.imencode('.jpg', image)
    if not success:
        raise ValueError("Could not encode image to JPEG format.")

    encoded_image = base64.b64encode(buffer).decode('utf-8')
    return encoded_image


def compose_payload(image: np.ndarray, prompt: str) -> dict:
    base64_image = encode_image_to_base64(image)
    return {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 300
    }


def compose_headers(api_key: str) -> dict:
    return {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }


def prompt_image(api_key: str, image: np.ndarray, prompt: str) -> str:
    headers = compose_headers(api_key=api_key)
    payload = compose_payload(image=image, prompt=prompt)
    response = requests.post(url=API_URL, headers=headers, json=payload).json()

    if 'error' in response:
        raise ValueError(response['error']['message'])
    return response['choices'][0]['message']['content']


def cache_image(image: np.ndarray) -> str:
    image_filename = f"{uuid.uuid4()}.jpeg"
    os.makedirs(IMAGE_CACHE_DIRECTORY, exist_ok=True)
    image_path = os.path.join(IMAGE_CACHE_DIRECTORY, image_filename)
    cv2.imwrite(image_path, image)
    return image_path


def respond(api_key: str, image: np.ndarray, prompt: str, chat_history):
    if not api_key:
        raise ValueError(
            "API_KEY is not set. "
            "Please follow the instructions in the README to set it up.")

    image = preprocess_image(image=image)
    cached_image_path = cache_image(image)
    response = prompt_image(api_key=api_key, image=image, prompt=prompt)
    chat_history.append(((cached_image_path,), None))
    chat_history.append((prompt, response))
    return "", chat_history


with gr.Blocks(theme=gr.themes.Default(primary_hue="pink", secondary_hue="pink")) as demo:
    gr.Markdown(MARKDOWN)
    with gr.Row():
        webcam = gr.Webcam(label="Webcam", source="webcam", streaming=True)
        with gr.Column():
            api_key_textbox = gr.Textbox(
                label="OpenAI API KEY", type="password")
            chatbot = gr.Chatbot(
                height=500, bubble_full_width=False, avatar_images=AVATARS)
            message_textbox = gr.Textbox()
            clear_button = gr.ClearButton(message_textbox, chatbot, variant="primary")

    message_textbox.submit(
        fn=respond,
        inputs=[api_key_textbox, webcam, message_textbox, chatbot],
        outputs=[message_textbox, chatbot]
    )

demo.launch(debug=False, show_error=True)