Spaces:
Runtime error
Runtime error
tonic
commited on
Commit
·
568c287
1
Parent(s):
6150c59
process audio logic
Browse files
app.py
CHANGED
@@ -151,18 +151,43 @@ def process_audio_to_text(audio_path, inputlanguage="English", outputlanguage="E
|
|
151 |
print("Audio Result: ", result)
|
152 |
return result[0]
|
153 |
|
154 |
-
def process_text_to_audio(text, translatefrom="English", translateto="English"):
|
155 |
"""
|
156 |
Convert text input to audio using the Gradio client.
|
|
|
157 |
"""
|
158 |
-
|
159 |
-
|
160 |
text,
|
161 |
translatefrom,
|
162 |
translateto,
|
163 |
api_name="/t2st"
|
164 |
)
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
def initialize_ocr_models():
|
168 |
"""
|
@@ -221,11 +246,11 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
221 |
audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
|
222 |
final_text += "\n" + audio_text
|
223 |
|
224 |
-
final_text_with_producetext = final_text + producetext
|
225 |
|
226 |
response = co.generate(
|
227 |
model='c4ai-aya',
|
228 |
-
prompt=final_text_with_producetext
|
229 |
max_tokens=1024,
|
230 |
temperature=0.5
|
231 |
)
|
|
|
151 |
print("Audio Result: ", result)
|
152 |
return result[0]
|
153 |
|
154 |
+
def process_text_to_audio(text, translatefrom="English", translateto="English", filename_prefix="audio"):
|
155 |
"""
|
156 |
Convert text input to audio using the Gradio client.
|
157 |
+
Ensure the audio file is correctly saved and returned as a file path.
|
158 |
"""
|
159 |
+
# Generate audio from text
|
160 |
+
audio_response = audio_client.predict(
|
161 |
text,
|
162 |
translatefrom,
|
163 |
translateto,
|
164 |
api_name="/t2st"
|
165 |
)
|
166 |
+
filename = f"{filename_prefix}_{hash(text)}.wav"
|
167 |
+
audio_file_path = save_audio_data_to_file(audio_response, filename=filename)
|
168 |
+
return audio_file_path
|
169 |
+
|
170 |
+
def save_audio_data_to_file(audio_data, directory="audio_files", filename="output_audio.wav"):
|
171 |
+
"""
|
172 |
+
Save audio data to a file and return the file path.
|
173 |
+
"""
|
174 |
+
Path(directory).mkdir(parents=True, exist_ok=True)
|
175 |
+
file_path = os.path.join(directory, filename)
|
176 |
+
with open(file_path, 'wb') as file:
|
177 |
+
file.write(audio_data)
|
178 |
+
return file_path
|
179 |
+
|
180 |
+
# Ensure the function that reads the audio file checks if the path is a file
|
181 |
+
def read_audio_file(file_path):
|
182 |
+
"""
|
183 |
+
Read and return the audio file content if the path is a file.
|
184 |
+
"""
|
185 |
+
if os.path.isfile(file_path):
|
186 |
+
with open(file_path, 'rb') as file:
|
187 |
+
return file.read()
|
188 |
+
else:
|
189 |
+
raise ValueError(f"Expected a file path, got a directory: {file_path}")
|
190 |
+
|
191 |
|
192 |
def initialize_ocr_models():
|
193 |
"""
|
|
|
246 |
audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
|
247 |
final_text += "\n" + audio_text
|
248 |
|
249 |
+
final_text_with_producetext = final_text + producetext.format(target_language=translateto)
|
250 |
|
251 |
response = co.generate(
|
252 |
model='c4ai-aya',
|
253 |
+
prompt=final_text_with_producetext,
|
254 |
max_tokens=1024,
|
255 |
temperature=0.5
|
256 |
)
|