visionB / app.py
Simba
WIP
b44e46f
import os
import cv2
import uuid
import gradio as gr
import numpy as np
import neovision
import random
MARKDOWN = """
# Welcome to VisionB 🧠 + 📸
Meet VisionB, your Visual Agent that combines the power of advanced GPT models with real-time visual inputs. Engage in interactive dialogues, ask questions, and gain insights with the added context of images from your webcam. Experience a new dimension of interaction where vision and conversational AI meet.
"""
connector = neovision.OpanAIConnector()
def generate_liveness_challenge(image_details):
# Based on the image details, generate a challenge
challenges = []
if 'glasses' in image_details:
challenges.append("Please take off your glasses and hold them in your hand.")
if 'smiling' in image_details:
challenges.append("Please take another picture with a neutral expression.")
# You can add more contextual clues and corresponding challenges
# Generic challenges if no specific detail is detected
if not challenges:
challenges = [
"Please hold up 5 fingers.",
"Use your hand to cover one of your eyes.",
"Make an OK sign with your hand and hold it up to your chin."
]
return random.choice(challenges)
def save_image_to_drive(image: np.ndarray) -> str:
image_filename = f"{uuid.uuid4()}.jpeg"
image_directory = "data"
os.makedirs(image_directory, exist_ok=True)
image_path = os.path.join(image_directory, image_filename)
cv2.imwrite(image_path, image)
return image_path
def respond(image: np.ndarray, prompt: str, chat_history=None):
# Initialize chat_history as an empty list if it's None
if chat_history is None:
chat_history = []
image = np.fliplr(image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_path = save_image_to_drive(image)
# response = connector.simple_prompt(image=image, prompt=prompt)
# If the user's prompt is 'verify', we start the liveness challenge
if 'verify' in prompt.lower():
# Use the image details to generate a challenge
# This is where you'd use the AI's analysis of the image to tailor the challenge
# For simplicity, the details are hard-coded here
# image_details = "A Person Wearing glasses" # Placeholder for actual analysis
# Get the image details from the AI model
image_details = connector.simple_prompt(image=image, prompt="What details can you describe from this image?")
# print(ai_response)
challenge = generate_liveness_challenge(image_details)
response = f"For liveness verification, {challenge}"
else:
# For any other prompt, just process normally
response = connector.simple_prompt(image=image, prompt=prompt)
chat_history.append(((image_path,), None))
chat_history.append((prompt, response))
return "", chat_history
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN)
with gr.Row():
webcam = gr.Image(sources=["webcam"], streaming=True)
with gr.Column():
chatbot = gr.Chatbot(height=500)
message = gr.Textbox()
clear_button = gr.ClearButton([message, chatbot])
message.submit(respond, [webcam, message, chatbot], [message, chatbot])
demo.launch(debug=False, show_error=True)