serJD commited on
Commit
0ca4d23
·
verified ·
1 Parent(s): 36c841d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import speech_recognition as sr
3
+ from PIL import Image
4
+ import io
5
+ import base64
6
+ import json
7
+
8
+ def process_data(image, audio):
9
+ # Process image: Resize and convert to base64
10
+ if image is not None:
11
+ image = Image.open(image)
12
+ # Resize image, maintaining aspect ratio, and max width 1024 pixels
13
+ base_width = 1024
14
+ w_percent = (base_width / float(image.size[0]))
15
+ h_size = int((float(image.size[1]) * float(w_percent)))
16
+ image = image.resize((base_width, h_size), Image.ANTIALIAS)
17
+
18
+ # Convert to base64
19
+ buffered = io.BytesIO()
20
+ image.save(buffered, format="JPEG")
21
+ img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
22
+ else:
23
+ img_str = ""
24
+
25
+ # Process audio: Convert speech to text
26
+ if audio is not None:
27
+ recognizer = sr.Recognizer()
28
+ with sr.AudioFile(audio) as source:
29
+ audio_data = recognizer.record(source)
30
+ try:
31
+ text = recognizer.recognize_google(audio_data)
32
+ except sr.UnknownValueError:
33
+ text = "Could not understand audio"
34
+ except sr.RequestError as e:
35
+ text = f"Could not request results; {e}"
36
+ else:
37
+ text = ""
38
+
39
+ # Prepare JSON data
40
+ data = json.dumps({"image": img_str, "text": text})
41
+
42
+ # Here you would add your code to send `data` to the Speckle stream
43
+ # For now, we'll just return the JSON to display it
44
+ return data
45
+
46
+ with gr.Blocks() as demo:
47
+ gr.Markdown("### Upload Image and Record Voice Message")
48
+ with gr.Row():
49
+ image = gr.Image(type="file", label="Upload Image")
50
+ audio = gr.Audio(source="microphone", type="file", label="Record Voice")
51
+ submit_btn = gr.Button("Submit")
52
+ output = gr.Textbox(label="JSON Output")
53
+
54
+ submit_btn.click(fn=process_data, inputs=[image, audio], outputs=output)
55
+
56
+ demo.launch()