Spaces:

drengskapur
/

openai-text-to-speech

Running

App Files Files

jonathanagustin commited on Sep 21

Commit

d72598f

•

1 Parent(s): 4ac8df6

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +8 -13

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ def tts(
         voice (str): The voice profile to use (e.g., 'alloy', 'echo', 'fable', etc.).
         api_key (str): OpenAI API key.
         response_format (str): The audio format of the output file (default is 'mp3').
-        speed (float): The speed of the synthesized speech.
     Returns:
         str: File path to the generated audio file.
@@ -42,29 +42,23 @@ def tts(
     try:
         response = openai.audio.speech.create(
-            input=input_text,
-            voice=voice,
             model=model,
             response_format=response_format,
             speed=speed,
         )
-    except openai.OpenAIError as e:
-        # Catch-all for OpenAI exceptions
         raise gr.Error(f"An OpenAI error occurred: {e}")
     except Exception as e:
         # Catch any other exceptions
         raise gr.Error(f"An unexpected error occurred: {e}")
-    if not hasattr(response, "audio"):
-        raise gr.Error(
-            "Invalid response from OpenAI API. The response does not contain audio content."
-        )
     # Save the audio content to a temporary file
-    audio_content = response.audio
     file_extension = f".{response_format}"
     with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as temp_file:
-        temp_file.write(audio_content)
         temp_file_path = temp_file.name
     return temp_file_path
@@ -171,6 +165,7 @@ def main():
                     step=0.05,
                     label="Voice Speed",
                     value=1.0,
                 )
             with gr.Column(scale=2):
@@ -229,4 +224,4 @@ def main():
     demo.launch(show_error=True)
 if __name__ == "__main__":
-    main()

         voice (str): The voice profile to use (e.g., 'alloy', 'echo', 'fable', etc.).
         api_key (str): OpenAI API key.
         response_format (str): The audio format of the output file (default is 'mp3').
+        speed (float): The speed of the synthesized speech (0.25 to 4.0).
     Returns:
         str: File path to the generated audio file.
     try:
         response = openai.audio.speech.create(
             model=model,
+            voice=voice,
+            input=input_text,
             response_format=response_format,
             speed=speed,
         )
+    except openai.error.OpenAIError as e:
+        # Catch OpenAI exceptions
         raise gr.Error(f"An OpenAI error occurred: {e}")
     except Exception as e:
         # Catch any other exceptions
         raise gr.Error(f"An unexpected error occurred: {e}")
     # Save the audio content to a temporary file
     file_extension = f".{response_format}"
     with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as temp_file:
+        response.stream_to_file(temp_file.name)
         temp_file_path = temp_file.name
     return temp_file_path
                     step=0.05,
                     label="Voice Speed",
                     value=1.0,
+                    info="Adjust the speed of the generated speech.",
                 )
             with gr.Column(scale=2):
     demo.launch(show_error=True)
 if __name__ == "__main__":
+    main()