Spaces:
Running
Running
add overlay, sound generation from api
Browse files- src/emotions/utils.py +70 -0
src/emotions/utils.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydub import AudioSegment
|
2 |
+
from pathlib import Path
|
3 |
+
from elevenlabs import ElevenLabs, AsyncElevenLabs
|
4 |
+
from elevenlabs import play, save
|
5 |
+
|
6 |
+
from src.config import logger
|
7 |
+
|
8 |
+
|
9 |
+
def get_audio_duration(filepath: str) -> float:
|
10 |
+
"""
|
11 |
+
Returns the duration of the audio file in seconds.
|
12 |
+
|
13 |
+
:param filepath: Path to the audio file.
|
14 |
+
:return: Duration of the audio file in seconds.
|
15 |
+
"""
|
16 |
+
audio = AudioSegment.from_file(filepath)
|
17 |
+
duration_in_seconds = len(audio) / 1000 # Convert milliseconds to seconds
|
18 |
+
return round(duration_in_seconds, 1)
|
19 |
+
|
20 |
+
|
21 |
+
def add_overlay_for_audio(main_audio_filename: str,
|
22 |
+
sound_effect_filename: str,
|
23 |
+
output_filename: str = None,
|
24 |
+
cycling_effect: bool = True,
|
25 |
+
decrease_effect_volume: int = 0) -> str:
|
26 |
+
try:
|
27 |
+
main_audio = AudioSegment.from_file(main_audio_filename)
|
28 |
+
effect_audio = AudioSegment.from_file(sound_effect_filename)
|
29 |
+
except Exception as e:
|
30 |
+
raise RuntimeError(f"Error loading audio files: {e}")
|
31 |
+
|
32 |
+
if cycling_effect:
|
33 |
+
while len(effect_audio) < len(main_audio):
|
34 |
+
effect_audio += effect_audio
|
35 |
+
|
36 |
+
effect_audio = effect_audio[:len(main_audio)]
|
37 |
+
|
38 |
+
if decrease_effect_volume > 0:
|
39 |
+
effect_audio = effect_audio - decrease_effect_volume
|
40 |
+
combined_audio = main_audio.overlay(effect_audio)
|
41 |
+
|
42 |
+
if output_filename is None:
|
43 |
+
output_filename = f"{Path(main_audio_filename).stem}_{Path(sound_effect_filename).stem}.wav"
|
44 |
+
combined_audio.export(output_filename, format="wav")
|
45 |
+
return output_filename
|
46 |
+
|
47 |
+
|
48 |
+
def sound_generation(sound_generation_data: dict, output_file: str):
|
49 |
+
client = ElevenLabs(
|
50 |
+
api_key="YOUR_API_KEY",
|
51 |
+
)
|
52 |
+
audio = client.text_to_sound_effects.convert(
|
53 |
+
text=sound_generation_data['text'],
|
54 |
+
duration_seconds=sound_generation_data['duration_seconds'],
|
55 |
+
prompt_influence=sound_generation_data['prompt_influence'],
|
56 |
+
)
|
57 |
+
save(audio, output_file)
|
58 |
+
logger.error("Successfully generated sound effect to file: %s", output_file)
|
59 |
+
|
60 |
+
async def sound_generation_async(sound_generation_data: dict, output_file: str):
|
61 |
+
client = AsyncElevenLabs(
|
62 |
+
api_key="YOUR_API_KEY",
|
63 |
+
)
|
64 |
+
audio = await client.text_to_sound_effects.convert(
|
65 |
+
text=sound_generation_data['text'],
|
66 |
+
duration_seconds=sound_generation_data['duration_seconds'],
|
67 |
+
prompt_influence=sound_generation_data['prompt_influence'],
|
68 |
+
)
|
69 |
+
save(audio, output_file)
|
70 |
+
logger.error("Successfully generated sound effect to file: %s", output_file)
|