Spaces:

serJD
/

withVisionAndVoice

Runtime error

App Files Files Community

serJD commited on Mar 16, 2024

Commit

0ca4d23

verified ·

1 Parent(s): 36c841d

Create app.py

Browse files

Files changed (1) hide show

app.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+import speech_recognition as sr
+from PIL import Image
+import io
+import base64
+import json
+def process_data(image, audio):
+    # Process image: Resize and convert to base64
+    if image is not None:
+        image = Image.open(image)
+        # Resize image, maintaining aspect ratio, and max width 1024 pixels
+        base_width = 1024
+        w_percent = (base_width / float(image.size[0]))
+        h_size = int((float(image.size[1]) * float(w_percent)))
+        image = image.resize((base_width, h_size), Image.ANTIALIAS)
+        # Convert to base64
+        buffered = io.BytesIO()
+        image.save(buffered, format="JPEG")
+        img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+    else:
+        img_str = ""
+    # Process audio: Convert speech to text
+    if audio is not None:
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(audio) as source:
+            audio_data = recognizer.record(source)
+        try:
+            text = recognizer.recognize_google(audio_data)
+        except sr.UnknownValueError:
+            text = "Could not understand audio"
+        except sr.RequestError as e:
+            text = f"Could not request results; {e}"
+    else:
+        text = ""
+    # Prepare JSON data
+    data = json.dumps({"image": img_str, "text": text})
+    # Here you would add your code to send `data` to the Speckle stream
+    # For now, we'll just return the JSON to display it
+    return data
+with gr.Blocks() as demo:
+    gr.Markdown("### Upload Image and Record Voice Message")
+    with gr.Row():
+        image = gr.Image(type="file", label="Upload Image")
+        audio = gr.Audio(source="microphone", type="file", label="Record Voice")
+    submit_btn = gr.Button("Submit")
+    output = gr.Textbox(label="JSON Output")
+    submit_btn.click(fn=process_data, inputs=[image, audio], outputs=output)
+demo.launch()