Spaces:
Sleeping
Sleeping
File size: 4,896 Bytes
f655f69 1e32511 f655f69 1e32511 f655f69 1e32511 f655f69 1e32511 f655f69 1e32511 c2fa877 f655f69 c2fa877 1a79fb2 f655f69 1a79fb2 f655f69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import datetime
import json
import re
import shutil
import typing as t
import wave
from collections.abc import Sized
from enum import StrEnum
from pathlib import Path
import pandas as pd
from httpx import Timeout
from langchain_openai import ChatOpenAI
from pydub import AudioSegment
from tenacity import retry, stop_after_attempt, wait_random_exponential
from src.config import logger, VOICES_CSV_FP
class GPTModels(StrEnum):
GPT_4o = "gpt-4o"
GPT_4o_MINI = "gpt-4o-mini"
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
def get_chat_llm(llm_model: GPTModels, temperature=0.0):
llm = ChatOpenAI(
model=llm_model,
temperature=temperature,
timeout=Timeout(60, connect=4),
)
return llm
def get_collection_safe_index(ix: int, collection: Sized):
res = min(ix, len(collection) - 1)
res = max(0, res)
return res
def write_txt(txt: str, fp: str):
with open(fp, 'w', encoding='utf-8') as fout:
fout.write(txt)
def write_json(data, fp: str, indent=2):
with open(fp, 'w', encoding='utf-8') as fout:
json.dump(data, fout, indent=indent, ensure_ascii=False)
def rm_dir_conditional(dp: str, to_remove=True):
if not to_remove:
return
logger.info(f'removing dir: "{dp}"')
try:
shutil.rmtree(dp)
except Exception:
logger.exception(f'failed to remove dir')
def get_utc_now_str():
now = datetime.datetime.now(tz=datetime.UTC)
now_str = now.strftime('%Y%m%d-%H%M%S')
return now_str
async def consume_aiter(aiterator):
return [x async for x in aiterator]
def auto_retry(f):
decorator = retry(
wait=wait_random_exponential(min=3, max=10),
stop=stop_after_attempt(20),
)
return decorator(f)
def write_bytes(data: bytes, fp: str):
logger.info(f'saving to: "{fp}"')
with open(fp, "wb") as fout:
fout.write(data)
def write_chunked_bytes(data: t.Iterable[bytes], fp: str):
logger.info(f'saving to: "{fp}"')
with open(fp, "wb") as fout:
for chunk in data:
if chunk:
fout.write(chunk)
def write_raw_pcm_to_file(data: bytes, fp: str, n_channels: int, bytes_depth: int, sampling_rate):
logger.info(f'saving to: "{fp}"')
with wave.open(fp, "wb") as f:
f.setnchannels(n_channels)
f.setsampwidth(bytes_depth)
f.setframerate(sampling_rate)
f.writeframes(data)
def get_audio_duration(filepath: str) -> float:
"""
Returns the duration of the audio file in seconds.
:param filepath: Path to the audio file.
:return: Duration of the audio file in seconds.
"""
audio = AudioSegment.from_file(filepath)
# Convert milliseconds to seconds
duration_in_seconds = len(audio) / 1000
return round(duration_in_seconds, 1)
def normalize_audio(audio_segment: AudioSegment, target_dBFS: float = -20.0) -> AudioSegment:
"""Normalize an audio segment to the target dBFS level."""
delta = target_dBFS - audio_segment.dBFS
res = audio_segment.apply_gain(delta)
return res
def overlay_multiple_audio(
main_audio_fp: str,
audios_to_overlay_fps: list[str],
starts_sec: list[float], # list of start positions, in seconds
out_fp: str,
):
main_audio = AudioSegment.from_file(main_audio_fp)
for fp, cur_start_sec in zip(audios_to_overlay_fps, starts_sec):
audio_to_overlay = AudioSegment.from_file(fp)
# NOTE: quote from the documentation:
# "The result is always the same length as this AudioSegment"
# reference: https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentoverlay
# NOTE: `position` params is offset time in milliseconds
start_ms = int(cur_start_sec * 1000)
main_audio = main_audio.overlay(audio_to_overlay, position=start_ms)
logger.info(f'saving overlayed audio to: "{out_fp}"')
main_audio.export(out_fp, format='wav')
def get_audio_from_voice_id(voice_id: str) -> str:
voices_df = pd.read_csv(VOICES_CSV_FP)
data = voices_df[voices_df["voice_id"] == voice_id]["preview_url"].values[0]
return data
def get_character_color(character: str) -> str:
if not character or character == "Unassigned":
return "#808080"
colors = [
"#FF6B6B", # pale red
"#ed1262", # magenta-red
"#ed2bac", # magenta
"#892ed5", # purple
"#4562f7", # blue
"#11ab99", # cyan
"#58f23a", # green
# "#96CEB4", # light green
# "#D4A5A5", # light red
]
hash_val = sum(ord(c) for c in character)
return colors[hash_val % len(colors)]
def prettify_unknown_character_label(text):
return re.sub(r'\bc(\d+)\b', r'Character\1', text)
def hex_to_rgb(hex_color):
hex_color = hex_color.lstrip('#')
return f"{int(hex_color[0:2], 16)},{int(hex_color[2:4], 16)},{int(hex_color[4:6], 16)}"
|