Spaces:

serJD
/

withVisionAndVoice

Runtime error

App Files Files Community

withVisionAndVoice / app.py

serJD

Create app.py

0ca4d23 verified over 1 year ago

raw

history blame

1.89 kB

	import gradio as gr
	import speech_recognition as sr
	from PIL import Image
	import io
	import base64
	import json

	def process_data(image, audio):
	# Process image: Resize and convert to base64
	if image is not None:
	image = Image.open(image)
	# Resize image, maintaining aspect ratio, and max width 1024 pixels
	base_width = 1024
	w_percent = (base_width / float(image.size[0]))
	h_size = int((float(image.size[1]) * float(w_percent)))
	image = image.resize((base_width, h_size), Image.ANTIALIAS)

	# Convert to base64
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
	else:
	img_str = ""

	# Process audio: Convert speech to text
	if audio is not None:
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio) as source:
	audio_data = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio_data)
	except sr.UnknownValueError:
	text = "Could not understand audio"
	except sr.RequestError as e:
	text = f"Could not request results; {e}"
	else:
	text = ""

	# Prepare JSON data
	data = json.dumps({"image": img_str, "text": text})

	# Here you would add your code to send `data` to the Speckle stream
	# For now, we'll just return the JSON to display it
	return data

	with gr.Blocks() as demo:
	gr.Markdown("### Upload Image and Record Voice Message")
	with gr.Row():
	image = gr.Image(type="file", label="Upload Image")
	audio = gr.Audio(source="microphone", type="file", label="Record Voice")
	submit_btn = gr.Button("Submit")
	output = gr.Textbox(label="JSON Output")

	submit_btn.click(fn=process_data, inputs=[image, audio], outputs=output)

	demo.launch()