Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -39,6 +39,13 @@ def process_text_input(api_key, text_prompt, selected_voice):
|
|
39 |
except Exception as e:
|
40 |
return f"Error: {str(e)}", None
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
43 |
"""Process audio input and generate a response"""
|
44 |
try:
|
@@ -53,6 +60,9 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
|
53 |
audio_data = audio_file.read()
|
54 |
encoded_audio = base64.b64encode(audio_data).decode('utf-8')
|
55 |
|
|
|
|
|
|
|
56 |
# Create message content with both text and audio
|
57 |
message_content = []
|
58 |
|
@@ -66,7 +76,7 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
|
66 |
"type": "input_audio",
|
67 |
"input_audio": {
|
68 |
"data": encoded_audio,
|
69 |
-
"format":
|
70 |
}
|
71 |
})
|
72 |
|
@@ -191,7 +201,8 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
|
|
191 |
audio_input = gr.Audio(
|
192 |
label="Audio Input",
|
193 |
type="filepath",
|
194 |
-
sources=["microphone", "upload"]
|
|
|
195 |
)
|
196 |
example_btn = gr.Button("Use Example Audio")
|
197 |
|
@@ -299,7 +310,7 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
|
|
299 |
## Notes:
|
300 |
- You must provide your OpenAI API key in the field above
|
301 |
- The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
|
302 |
-
- Audio inputs
|
303 |
- Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
|
304 |
- Each audio response is automatically transcribed for verification
|
305 |
""")
|
|
|
39 |
except Exception as e:
|
40 |
return f"Error: {str(e)}", None
|
41 |
|
42 |
+
def get_audio_format(audio_path):
|
43 |
+
"""Determine audio format from file extension"""
|
44 |
+
_, ext = os.path.splitext(audio_path)
|
45 |
+
if ext.lower() == '.m4a':
|
46 |
+
return 'm4a'
|
47 |
+
return 'wav' # Default to wav for all other formats
|
48 |
+
|
49 |
def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
50 |
"""Process audio input and generate a response"""
|
51 |
try:
|
|
|
60 |
audio_data = audio_file.read()
|
61 |
encoded_audio = base64.b64encode(audio_data).decode('utf-8')
|
62 |
|
63 |
+
# Determine audio format
|
64 |
+
audio_format = get_audio_format(audio_path)
|
65 |
+
|
66 |
# Create message content with both text and audio
|
67 |
message_content = []
|
68 |
|
|
|
76 |
"type": "input_audio",
|
77 |
"input_audio": {
|
78 |
"data": encoded_audio,
|
79 |
+
"format": audio_format
|
80 |
}
|
81 |
})
|
82 |
|
|
|
201 |
audio_input = gr.Audio(
|
202 |
label="Audio Input",
|
203 |
type="filepath",
|
204 |
+
sources=["microphone", "upload"],
|
205 |
+
file_types=["audio/wav", "audio/x-m4a", "audio/mp4"]
|
206 |
)
|
207 |
example_btn = gr.Button("Use Example Audio")
|
208 |
|
|
|
310 |
## Notes:
|
311 |
- You must provide your OpenAI API key in the field above
|
312 |
- The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
|
313 |
+
- Audio inputs can be in WAV or M4A format
|
314 |
- Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
|
315 |
- Each audio response is automatically transcribed for verification
|
316 |
""")
|