Spaces:
Runtime error
Runtime error
import gradio as gr | |
import speech_recognition as sr | |
from PIL import Image | |
import io | |
import base64 | |
import json | |
def process_data(image, audio): | |
# Process image: Resize and convert to base64 | |
if image is not None: | |
image = Image.open(image) | |
# Resize image, maintaining aspect ratio, and max width 1024 pixels | |
base_width = 1024 | |
w_percent = (base_width / float(image.size[0])) | |
h_size = int((float(image.size[1]) * float(w_percent))) | |
image = image.resize((base_width, h_size), Image.ANTIALIAS) | |
# Convert to base64 | |
buffered = io.BytesIO() | |
image.save(buffered, format="JPEG") | |
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') | |
else: | |
img_str = "" | |
# Process audio: Convert speech to text | |
if audio is not None: | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(audio) as source: | |
audio_data = recognizer.record(source) | |
try: | |
text = recognizer.recognize_google(audio_data) | |
except sr.UnknownValueError: | |
text = "Could not understand audio" | |
except sr.RequestError as e: | |
text = f"Could not request results; {e}" | |
else: | |
text = "" | |
# Prepare JSON data | |
data = json.dumps({"image": img_str, "text": text}) | |
# Here you would add your code to send `data` to the Speckle stream | |
# For now, we'll just return the JSON to display it | |
return data | |
with gr.Blocks() as demo: | |
gr.Markdown("### Upload Image and Record Voice Message") | |
with gr.Row(): | |
image = gr.Image(type="file", label="Upload Image") | |
audio = gr.Audio(source="microphone", type="file", label="Record Voice") | |
submit_btn = gr.Button("Submit") | |
output = gr.Textbox(label="JSON Output") | |
submit_btn.click(fn=process_data, inputs=[image, audio], outputs=output) | |
demo.launch() | |