Spaces:

drengskapur
/

openai-text-to-speech

Running

App Files Files

jonathanagustin commited on Sep 21, 2024

Commit

055f404

verified ·

1 Parent(s): 7b9da97

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +45 -100

app.py CHANGED Viewed

@@ -16,75 +16,10 @@ def tts(
     """
     Convert input text to speech using OpenAI's Text-to-Speech API.
-    Parameters:
-        input_text (str): The text to be converted to speech.
-        model (str): The model to use for synthesis (e.g., 'tts-1', 'tts-1-hd').
-        voice (str): The voice to use when generating the audio.
-        api_key (str): OpenAI API key.
-        response_format (str): Format of the output audio. Defaults to 'mp3'.
-        speed (float): Speed of the generated audio. Defaults to 1.0.
-    Returns:
-        str: File path to the generated audio file.
-    Raises:
-        gr.Error: If input parameters are invalid or API call fails.
     """
-    if not api_key.strip():
-        raise gr.Error(
-            "API key is required. Get an API key at: https://platform.openai.com/account/api-keys"
-        )
-    if not input_text.strip():
-        raise gr.Error("Input text cannot be empty.")
-    if len(input_text) > 4096:
-        raise gr.Error("Input text exceeds the maximum length of 4096 characters.")
-    if speed < 0.25 or speed > 4.0:
-        raise gr.Error("Speed must be between 0.25 and 4.0.")
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-    }
-    data = {
-        "model": model,
-        "input": input_text,
-        "voice": voice,
-        "response_format": response_format,
-        "speed": speed,
-    }
-    try:
-        response = requests.post(
-            "https://api.openai.com/v1/audio/speech",
-            headers=headers,
-            json=data,
-        )
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as http_err:
-        raise gr.Error(f"HTTP error occurred: {http_err} - {response.text}")
-    except Exception as err:
-        raise gr.Error(f"An error occurred: {err}")
-    # The content will be the audio file content
-    audio_content = response.content
-    file_extension = response_format.lower()
-    # PCM is raw data, so it does not have a standard file extension
-    if file_extension == "pcm":
-        file_extension = "raw"
-    with tempfile.NamedTemporaryFile(
-        suffix=f".{file_extension}", delete=False
-    ) as temp_file:
-        temp_file.write(audio_content)
-        temp_file_path = temp_file.name
-    return temp_file_path
 def main():
     """
@@ -121,20 +56,26 @@ def main():
     gr.set_static_paths(paths=[PREVIEW_DIR])
     with gr.Blocks(title="OpenAI - Text to Speech") as demo:
         with gr.Row():
             with gr.Column(scale=1):
                 with gr.Group():
                     preview_audio = gr.Audio(
                         interactive=False,
-                        label="Preview Audio",
-                        value=None,
                         visible=True,
                     )
-                    # Function to play the selected voice sample
-                    def play_voice_sample(voice):
-                        return gr.update(value=VOICE_PREVIEW_FILES[voice])
                     # Create buttons for each voice
                     for voice in VOICE_OPTIONS:
                         voice_button = gr.Button(
@@ -146,6 +87,10 @@ def main():
                             fn=partial(play_voice_sample, voice=voice),
                             outputs=preview_audio,
                         )
             with gr.Column(scale=1):
                 api_key_input = gr.Textbox(
                     label="OpenAI API Key",
@@ -178,33 +123,33 @@ def main():
                     value=1.0,
                 )
-        with gr.Column(scale=2):
-            input_textbox = gr.Textbox(
-                label="Input Text",
-                lines=10,
-                placeholder="Type your text here...",
-            )
-            # Add a character counter below the input textbox
-            char_count_text = gr.Markdown("0 / 4096")
-            # Function to update the character count
-            def update_char_count(input_text):
-                char_count = len(input_text)
-                return f"**{char_count} / 4096**"
-            # Update character count when the user stops typing
-            input_textbox.change(
-                fn=update_char_count,
-                inputs=input_textbox,
-                outputs=char_count_text,
-            )
-            submit_button = gr.Button(
-                "Convert Text to Speech",
-                variant="primary",
-            )
-        with gr.Column(scale=1):
-            output_audio = gr.Audio(label="Output Audio")
         # Define the event handler for the submit button with error handling
         def on_submit(

     """
     Convert input text to speech using OpenAI's Text-to-Speech API.
+    (Function definition remains the same)
     """
+    # (Function body remains the same)
+    # ...
 def main():
     """
     gr.set_static_paths(paths=[PREVIEW_DIR])
     with gr.Blocks(title="OpenAI - Text to Speech") as demo:
+        gr.Markdown("# OpenAI Text-to-Speech Demo")
         with gr.Row():
             with gr.Column(scale=1):
                 with gr.Group():
+                    gr.Markdown("### Voice Preview")
+                    # Function to play the selected voice sample
+                    def play_voice_sample(voice):
+                        return gr.Audio.update(
+                            value=VOICE_PREVIEW_FILES[voice],
+                            label=voice.capitalize(),
+                        )
+                    # Create the 'preview_audio' component
                     preview_audio = gr.Audio(
                         interactive=False,
+                        label="Echo",
+                        value=VOICE_PREVIEW_FILES['echo'],
                         visible=True,
                     )
                     # Create buttons for each voice
                     for voice in VOICE_OPTIONS:
                         voice_button = gr.Button(
                             fn=partial(play_voice_sample, voice=voice),
                             outputs=preview_audio,
                         )
+                    # Place the audio player below the buttons
+                    preview_audio.render()
             with gr.Column(scale=1):
                 api_key_input = gr.Textbox(
                     label="OpenAI API Key",
                     value=1.0,
                 )
+            with gr.Column(scale=2):
+                input_textbox = gr.Textbox(
+                    label="Input Text",
+                    lines=10,
+                    placeholder="Type your text here...",
+                )
+                # Add a character counter below the input textbox
+                char_count_text = gr.Markdown("0 / 4096")
+                # Function to update the character count
+                def update_char_count(input_text):
+                    char_count = len(input_text)
+                    return f"**{char_count} / 4096**"
+                # Update character count when the user stops typing
+                input_textbox.change(
+                    fn=update_char_count,
+                    inputs=input_textbox,
+                    outputs=char_count_text,
+                )
+                submit_button = gr.Button(
+                    "Convert Text to Speech",
+                    variant="primary",
+                )
+            with gr.Column(scale=1):
+                output_audio = gr.Audio(label="Output Audio")
         # Define the event handler for the submit button with error handling
         def on_submit(