tonic commited on
Commit
568c287
·
1 Parent(s): 6150c59

process audio logic

Browse files
Files changed (1) hide show
  1. app.py +31 -6
app.py CHANGED
@@ -151,18 +151,43 @@ def process_audio_to_text(audio_path, inputlanguage="English", outputlanguage="E
151
  print("Audio Result: ", result)
152
  return result[0]
153
 
154
- def process_text_to_audio(text, translatefrom="English", translateto="English"):
155
  """
156
  Convert text input to audio using the Gradio client.
 
157
  """
158
- audio_client = Client(SEAMLESSM4T)
159
- result = audio_client.predict(
160
  text,
161
  translatefrom,
162
  translateto,
163
  api_name="/t2st"
164
  )
165
- return result[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  def initialize_ocr_models():
168
  """
@@ -221,11 +246,11 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
221
  audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
222
  final_text += "\n" + audio_text
223
 
224
- final_text_with_producetext = final_text + producetext
225
 
226
  response = co.generate(
227
  model='c4ai-aya',
228
- prompt=final_text_with_producetext.format(target_language=translateto),
229
  max_tokens=1024,
230
  temperature=0.5
231
  )
 
151
  print("Audio Result: ", result)
152
  return result[0]
153
 
154
+ def process_text_to_audio(text, translatefrom="English", translateto="English", filename_prefix="audio"):
155
  """
156
  Convert text input to audio using the Gradio client.
157
+ Ensure the audio file is correctly saved and returned as a file path.
158
  """
159
+ # Generate audio from text
160
+ audio_response = audio_client.predict(
161
  text,
162
  translatefrom,
163
  translateto,
164
  api_name="/t2st"
165
  )
166
+ filename = f"{filename_prefix}_{hash(text)}.wav"
167
+ audio_file_path = save_audio_data_to_file(audio_response, filename=filename)
168
+ return audio_file_path
169
+
170
+ def save_audio_data_to_file(audio_data, directory="audio_files", filename="output_audio.wav"):
171
+ """
172
+ Save audio data to a file and return the file path.
173
+ """
174
+ Path(directory).mkdir(parents=True, exist_ok=True)
175
+ file_path = os.path.join(directory, filename)
176
+ with open(file_path, 'wb') as file:
177
+ file.write(audio_data)
178
+ return file_path
179
+
180
+ # Ensure the function that reads the audio file checks if the path is a file
181
+ def read_audio_file(file_path):
182
+ """
183
+ Read and return the audio file content if the path is a file.
184
+ """
185
+ if os.path.isfile(file_path):
186
+ with open(file_path, 'rb') as file:
187
+ return file.read()
188
+ else:
189
+ raise ValueError(f"Expected a file path, got a directory: {file_path}")
190
+
191
 
192
  def initialize_ocr_models():
193
  """
 
246
  audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
247
  final_text += "\n" + audio_text
248
 
249
+ final_text_with_producetext = final_text + producetext.format(target_language=translateto)
250
 
251
  response = co.generate(
252
  model='c4ai-aya',
253
+ prompt=final_text_with_producetext,
254
  max_tokens=1024,
255
  temperature=0.5
256
  )