shukdevdatta123 commited on
Commit
d359601
·
verified ·
1 Parent(s): c32d1e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -39,6 +39,13 @@ def process_text_input(api_key, text_prompt, selected_voice):
39
  except Exception as e:
40
  return f"Error: {str(e)}", None
41
 
 
 
 
 
 
 
 
42
  def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
43
  """Process audio input and generate a response"""
44
  try:
@@ -53,6 +60,9 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
53
  audio_data = audio_file.read()
54
  encoded_audio = base64.b64encode(audio_data).decode('utf-8')
55
 
 
 
 
56
  # Create message content with both text and audio
57
  message_content = []
58
 
@@ -66,7 +76,7 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
66
  "type": "input_audio",
67
  "input_audio": {
68
  "data": encoded_audio,
69
- "format": "wav"
70
  }
71
  })
72
 
@@ -191,7 +201,8 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
191
  audio_input = gr.Audio(
192
  label="Audio Input",
193
  type="filepath",
194
- sources=["microphone", "upload"]
 
195
  )
196
  example_btn = gr.Button("Use Example Audio")
197
 
@@ -299,7 +310,7 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
299
  ## Notes:
300
  - You must provide your OpenAI API key in the field above
301
  - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
302
- - Audio inputs should be in WAV format
303
  - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
304
  - Each audio response is automatically transcribed for verification
305
  """)
 
39
  except Exception as e:
40
  return f"Error: {str(e)}", None
41
 
42
+ def get_audio_format(audio_path):
43
+ """Determine audio format from file extension"""
44
+ _, ext = os.path.splitext(audio_path)
45
+ if ext.lower() == '.m4a':
46
+ return 'm4a'
47
+ return 'wav' # Default to wav for all other formats
48
+
49
  def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
50
  """Process audio input and generate a response"""
51
  try:
 
60
  audio_data = audio_file.read()
61
  encoded_audio = base64.b64encode(audio_data).decode('utf-8')
62
 
63
+ # Determine audio format
64
+ audio_format = get_audio_format(audio_path)
65
+
66
  # Create message content with both text and audio
67
  message_content = []
68
 
 
76
  "type": "input_audio",
77
  "input_audio": {
78
  "data": encoded_audio,
79
+ "format": audio_format
80
  }
81
  })
82
 
 
201
  audio_input = gr.Audio(
202
  label="Audio Input",
203
  type="filepath",
204
+ sources=["microphone", "upload"],
205
+ file_types=["audio/wav", "audio/x-m4a", "audio/mp4"]
206
  )
207
  example_btn = gr.Button("Use Example Audio")
208
 
 
310
  ## Notes:
311
  - You must provide your OpenAI API key in the field above
312
  - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
313
+ - Audio inputs can be in WAV or M4A format
314
  - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
315
  - Each audio response is automatically transcribed for verification
316
  """)