Spaces:

shukdevdatta123
/

WaveTalk

Running

App Files Files Community

shukdevdatta123 commited on Apr 20

Commit

d359601

verified ·

1 Parent(s): c32d1e4

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -3

app.py CHANGED Viewed

@@ -39,6 +39,13 @@ def process_text_input(api_key, text_prompt, selected_voice):
     except Exception as e:
         return f"Error: {str(e)}", None
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
@@ -53,6 +60,9 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
             audio_data = audio_file.read()
         encoded_audio = base64.b64encode(audio_data).decode('utf-8')
         # Create message content with both text and audio
         message_content = []
@@ -66,7 +76,7 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
             "type": "input_audio",
             "input_audio": {
                 "data": encoded_audio,
-                "format": "wav"
             }
         })
@@ -191,7 +201,8 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
                 audio_input = gr.Audio(
                     label="Audio Input",
                     type="filepath",
-                    sources=["microphone", "upload"]
                 )
                 example_btn = gr.Button("Use Example Audio")
@@ -299,7 +310,7 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
     ## Notes:
     - You must provide your OpenAI API key in the field above
     - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
-    - Audio inputs should be in WAV format
     - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
     - Each audio response is automatically transcribed for verification
     """)

     except Exception as e:
         return f"Error: {str(e)}", None
+def get_audio_format(audio_path):
+    """Determine audio format from file extension"""
+    _, ext = os.path.splitext(audio_path)
+    if ext.lower() == '.m4a':
+        return 'm4a'
+    return 'wav'  # Default to wav for all other formats
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
             audio_data = audio_file.read()
         encoded_audio = base64.b64encode(audio_data).decode('utf-8')
+        # Determine audio format
+        audio_format = get_audio_format(audio_path)
         # Create message content with both text and audio
         message_content = []
             "type": "input_audio",
             "input_audio": {
                 "data": encoded_audio,
+                "format": audio_format
             }
         })
                 audio_input = gr.Audio(
                     label="Audio Input",
                     type="filepath",
+                    sources=["microphone", "upload"],
+                    file_types=["audio/wav", "audio/x-m4a", "audio/mp4"]
                 )
                 example_btn = gr.Button("Use Example Audio")
     ## Notes:
     - You must provide your OpenAI API key in the field above
     - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
+    - Audio inputs can be in WAV or M4A format
     - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
     - Each audio response is automatically transcribed for verification
     """)