Tonic commited on
Commit
cbb63b6
1 Parent(s): 16c1c4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -16
app.py CHANGED
@@ -122,35 +122,41 @@ def process_speech(input_language, audio_input):
122
  return f"{e}"
123
 
124
 
125
- def convert_text_to_speech(input_text, target_language):
126
  """
127
- Convert text to speech in the specified language, rename the audio file with a unique identifier, and return both the new audio file path and the input text.
128
  """
 
 
129
  try:
130
- text_to_speech_result = seamless_client.predict(
131
- "T2ST", # Task: Text to Speech Translation
132
  "text", # Input type
133
- None, # No file input for text to speech
134
  input_text, # Input text
135
  "", # Empty string for audio name
136
- "", # Empty string for source language, as it's not needed here
137
  target_language, # Target language
138
  api_name="/run" # API name
139
  )
140
 
141
- original_audio_file = text_to_speech_result[1] # Assuming the audio file path is in the second position
 
142
 
143
- # Generate a new file name with a random UUID
144
- new_file_name = f"audio_output_{uuid.uuid4()}.wav"
145
- new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
 
146
 
147
- # Rename the file
148
- os.rename(original_audio_file, new_file_path)
149
 
150
- return new_file_path, input_text
151
- except Exception as e:
152
- return f"An error occurred during text-to-speech conversion: {e}", input_text
153
 
 
 
 
154
 
155
  def save_image(image_input, output_dir="saved_images"):
156
  if not os.path.exists(output_dir):
@@ -423,7 +429,8 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
423
  final_response = process_summary_with_stablemed(summary)
424
 
425
  # Convert translated text to speech and get both audio file and text
426
- audio_output, translated_text = convert_text_to_speech(final_response, input_language)
 
427
 
428
  # Evaluate hallucination
429
  hallucination_label = evaluate_hallucination(final_response, summary)
 
122
  return f"{e}"
123
 
124
 
125
+ def convert_text_to_speech(input_text, source_language, target_language):
126
  """
127
+ Convert text to speech in the specified language and return the new audio file path.
128
  """
129
+ client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
130
+
131
  try:
132
+ result = client.predict(
133
+ "T2ST (Text to Speech translation)", # Task
134
  "text", # Input type
 
135
  input_text, # Input text
136
  "", # Empty string for audio name
137
+ source_language, # Source language
138
  target_language, # Target language
139
  api_name="/run" # API name
140
  )
141
 
142
+ # Assuming the audio file path is returned in the result
143
+ original_audio_file = result[1] if len(result) > 1 else None
144
 
145
+ if original_audio_file:
146
+ # Generate a new file name with a random UUID
147
+ new_file_name = f"audio_output_{uuid.uuid4()}.wav"
148
+ new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
149
 
150
+ # Rename the file
151
+ os.rename(original_audio_file, new_file_path)
152
 
153
+ return new_file_path
154
+ else:
155
+ return "No audio file generated."
156
 
157
+ except Exception as e:
158
+ # Return a concise error message
159
+ return f"Error in text-to-speech conversion: {str(e)}"
160
 
161
  def save_image(image_input, output_dir="saved_images"):
162
  if not os.path.exists(output_dir):
 
429
  final_response = process_summary_with_stablemed(summary)
430
 
431
  # Convert translated text to speech and get both audio file and text
432
+ target_language = "English" # Set the target language for the speech
433
+ audio_file_path = convert_text_to_speech(final_response, target_language, input_language)
434
 
435
  # Evaluate hallucination
436
  hallucination_label = evaluate_hallucination(final_response, summary)