Spaces:

Bilic
/

visionB

Sleeping

App Files Files Community

Simba commited on Nov 7, 2023

Commit

2874322

1 Parent(s): d6c1203

WIP

Browse files

Files changed (6) hide show

README.md +2 -2
app.py +50 -0
neovision/__init__.py +2 -0
neovision/core.py +28 -0
neovision/utils.py +62 -0
requirements.txt +6 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: VisionB
-emoji: 💻
 colorFrom: pink
 colorTo: pink
 sdk: gradio

 ---
+title: NeoVision
+emoji: 🏢
 colorFrom: pink
 colorTo: pink
 sdk: gradio

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import cv2
+import uuid
+import gradio as gr
+import numpy as np
+import neovision
+MARKDOWN = """
+# neovision 💬 + 📸
+This is a demo of neovision, a tool that allows you to chat with your webcamusinf GTP Vision.
+"""
+connector = neovision.OpanAIConnector()
+def save_image_to_drive(image: np.ndarray) -> str:
+    image_filename = f"{uuid.uuid4()}.jpeg"
+    image_directory = "data"
+    os.makedirs(image_directory, exist_ok=True)
+    image_path = os.path.join(image_directory, image_filename)
+    cv2.imwrite(image_path, image)
+    return image_path
+def respond(image: np.ndarray, prompt: str, chat_history):
+    image = np.fliplr(image)
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    image_path = save_image_to_drive(image)
+    response = connector.simple_prompt(image=image, prompt=prompt)
+    chat_history.append(((image_path,), None))
+    chat_history.append((prompt, response))
+    return "", chat_history
+with gr.Blocks() as demo:
+    gr.Markdown(MARKDOWN)
+    with gr.Row():
+        webcam = gr.Image(source="webcam", streaming=True)
+        with gr.Column():
+            chatbot = gr.Chatbot(height=500)
+            message = gr.Textbox()
+            clear_button = gr.ClearButton([message, chatbot])
+    message.submit(respond, [webcam, message, chatbot], [message, chatbot])
+demo.launch(debug=False, show_error=True)

neovision/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from neovision.utils import encode_image_to_base64, compose_payload
2	+ from neovision.core import OpanAIConnector

neovision/core.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+import numpy as np
+import requests
+from neovision.utils import compose_payload
+API_KEY = os.getenv('OPENAI_API_KEY')
+class OpanAIConnector:
+    def __init__(self, api_key: str = API_KEY):
+        if api_key is None:
+            raise ValueError("API_KEY is not set")
+        self.api_key = api_key
+    def simple_prompt(self, image: np.ndarray, prompt: str) -> str:
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+        payload = compose_payload(image=image, prompt=prompt)
+        response = requests.post("https://api.openai.com/v1/chat/completions",
+                                 headers=headers, json=payload).json()
+        return response['choices'][0]['message']['content']

neovision/utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import cv2
+import base64
+import numpy as np
+def encode_image_to_base64(image: np.ndarray) -> str:
+    """
+    Encodes a given image represented as a NumPy array to a base64-encoded string.
+    Parameters:
+       image (np.ndarray): A NumPy array representing the image to be encoded.
+    Returns:
+       str: A base64-encoded string representing the input image in JPEG format.
+    Raises:
+       ValueError: If the image cannot be encoded to JPEG format.
+   """
+    success, buffer = cv2.imencode('.jpg', image)
+    if not success:
+        raise ValueError("Could not encode image to JPEG format.")
+    encoded_image = base64.b64encode(buffer).decode('utf-8')
+    return encoded_image
+def compose_payload(image: np.ndarray, prompt: str) -> dict:
+    """
+    Composes a payload dictionary with a base64 encoded image and a text prompt for the GPT-4 Vision model.
+    Args:
+        image (np.ndarray): The image in the form of a NumPy array to encode and send.
+        prompt (str): The prompt text to accompany the image in the payload.
+    Returns:
+        dict: A dictionary structured as a payload for the GPT-4 Vision model, including the model name,
+              an array of messages each containing a role and content with text and the base64 encoded image,
+              and the maximum number of tokens to generate.
+    """
+    base64_image = encode_image_to_base64(image)
+    return {
+        "model": "gpt-4-vision-preview",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 300
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+supervision
+openai
+opencv-python
+numpy
+requests
+gradio