fountai commited on
Commit
1fe2f2f
·
1 Parent(s): a235f02
.env.example ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ HF_TOKEN=
2
+ AWS_ENDPOINT=
3
+ AWS_ACCOUNT_ID=
4
+ AWS_ACCESS_KEY=
5
+ AWS_SECRET_KEY=
6
+ AWS_BUCKET=
7
+ REDIS_HOST=
8
+ REDIS_PORT=
9
+ REDIS_DB=
10
+ REDIS_PASS=
.gitattributes copy ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ run.py
2
+ audio.wav
3
+ key.wav
4
+ key.txt
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "f5"]
2
+ path = f5
3
+ url = https://github.com/eletroswing/f5
README copy.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Mimic
3
+ emoji: 🔥
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.9.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -71,38 +71,8 @@ class ProcessRequest(BaseModel):
71
  offset: float = -0.3
72
  format: str = "wav"
73
  speed: float = 0.8
74
- crossfade: float = 0.1
75
-
76
- q = queue.Queue()
77
-
78
- def process_queue(q):
79
- while True:
80
- try:
81
- key, censor, offset, text, format, speed, crossfade, id, receiver, webhook = q.get(timeout=5)
82
- audio = generate_audio(key, text, censor, offset, speed=speed, crossfade=crossfade)
83
- convertedAudioPath = convert(audio, format)
84
- duration = get_audio_duration(convertedAudioPath)
85
- audioUrl = upload_to_s3(convertedAudioPath, f"{id}", format)
86
- os.remove(audio)
87
- os.remove(convertedAudioPath)
88
-
89
- payload = {
90
- "id": id,
91
- "duration": duration,
92
- "receiver": receiver,
93
- "url": audioUrl
94
- }
95
-
96
- requests.post(webhook, json=payload)
97
- except Exception as e:
98
- print(e)
99
- finally:
100
- q.task_done()
101
 
102
-
103
- worker_thread = threading.Thread(target=process_queue, args=(q,))
104
- worker_thread.start()
105
-
106
  @app.post("/process")
107
  def process_audio(payload: ProcessRequest):
108
  key = payload.key
@@ -120,7 +90,21 @@ def process_audio(payload: ProcessRequest):
120
  raise HTTPException(status_code=500, detail=str(e))
121
 
122
  try:
123
- q.put((key, censor, offset, text, format, speed, crossfade, id, receiver, webhook))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  return {"success": True, "err": ""}
125
 
126
  except ValueError as e:
 
71
  offset: float = -0.3
72
  format: str = "wav"
73
  speed: float = 0.8
74
+ crossfade: float = 0.06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
 
 
 
 
76
  @app.post("/process")
77
  def process_audio(payload: ProcessRequest):
78
  key = payload.key
 
90
  raise HTTPException(status_code=500, detail=str(e))
91
 
92
  try:
93
+ audio = generate_audio(key, text, censor, offset, speed=speed, crossfade=crossfade)
94
+ convertedAudioPath = convert(audio, format)
95
+ duration = get_audio_duration(convertedAudioPath)
96
+ audioUrl = upload_to_s3(convertedAudioPath, f"{id}", format)
97
+ os.remove(audio)
98
+ os.remove(convertedAudioPath)
99
+
100
+ payload = {
101
+ "id": id,
102
+ "duration": duration,
103
+ "receiver": receiver,
104
+ "url": audioUrl
105
+ }
106
+
107
+ requests.post(webhook, json=payload)
108
  return {"success": True, "err": ""}
109
 
110
  except ValueError as e:
models/__pycache__/whisper.cpython-312.pyc ADDED
Binary file (520 Bytes). View file
 
models/censor.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.bad_words import words
2
+ import re
3
+ from models.whisper import model
4
+ import random
5
+ import string
6
+ import subprocess
7
+
8
+ class Wash:
9
+ def __init__(self, custom_db = None):
10
+ self.db = words
11
+ if(custom_db):
12
+ self.db = custom_db
13
+
14
+ @staticmethod
15
+ def clean(phrase):
16
+ return re.sub(r'\s+', ' ', phrase.lower())
17
+
18
+ @staticmethod
19
+ def tokenize(phrase):
20
+ with_punctuation = re.sub(r'[^a-zA-Z\s]', '', phrase).split()
21
+ without_punctuation = re.sub(r'[^a-zA-Z\s]', '', phrase).split()
22
+ return with_punctuation + without_punctuation
23
+
24
+ def words(self):
25
+ return self.db
26
+
27
+ def check_word(self, word):
28
+ word = word.replace(" ", "")
29
+ bad_words = self.words()
30
+ tokens = self.tokenize(word)
31
+
32
+ for token in tokens:
33
+ if token in bad_words:
34
+ return True
35
+
36
+ return False
37
+
38
+ def generate_random_name(self, length):
39
+ letters = string.ascii_lowercase
40
+ return ''.join(random.choice(letters) for _ in range(length))
41
+
42
+ def insert_beep(self, itensList, original_file, offset=0):
43
+ if not itensList:
44
+ return
45
+ output = self.generate_random_name(8) + ".wav"
46
+
47
+ filter_str = "[0]volume=0:enable=\'"
48
+ for i, item in enumerate(itensList):
49
+ start = item["start"]
50
+ end = item["end"]
51
+ start_plus_offset = round(start + offset, 2)
52
+ end_minus_offset = round(end - offset, 2)
53
+ filter_str += f'between(t,{start_plus_offset},{end_minus_offset})'
54
+
55
+ if i != len(itensList) - 1:
56
+ filter_str += '+'
57
+ else:
58
+ filter_str += "\'"
59
+ for item in itensList:
60
+ start = round(item["start"], 2)
61
+ end = round(item["end"], 2)
62
+ start_plus_offset = round(start + offset, 2)
63
+ end_minus_offset = round(end - offset, 2)
64
+
65
+ filter_str += f'[main{start}];'
66
+ filter_str += f'sine=d={round(end_minus_offset - start_plus_offset, 2)}:f=1000,adelay={start_plus_offset * 1000}s,pan=stereo|FL=c0|FR=c0[beep{start}];'
67
+ filter_str += f'[main{start}][beep{start}]amix=inputs=2:duration=longest'
68
+
69
+ filter_str = filter_str.rstrip(',')
70
+
71
+ ffmpeg_cmd = [
72
+ "ffmpeg",
73
+ "-i",
74
+ original_file,
75
+ "-filter_complex",
76
+ filter_str,
77
+ output
78
+ ]
79
+
80
+ result = subprocess.run(ffmpeg_cmd)
81
+
82
+ return output
83
+
84
+ def process_audio(self, audio_path, offset):
85
+ bad_time = []
86
+ segments, info = model.transcribe(audio_path, word_timestamps=True, beam_size=1, best_of=1)
87
+ for segment in segments:
88
+ for word in segment.words:
89
+ is_profane = self.check_word(word.word)
90
+ if(is_profane):
91
+ bad_time.append({
92
+ "start": word.start,
93
+ "end": word.end,
94
+ })
95
+
96
+ result = self.insert_beep(bad_time, audio_path, offset)
97
+ if(result):
98
+ return result
99
+ return audio_path
100
+
101
+ def check(self, phrase):
102
+ cleaned_phrase = self.clean(phrase)
103
+ cleaned_phrase = phrase.replace(" ", "")
104
+ tokens = self.tokenize(cleaned_phrase)
105
+ bad_words = self.words()
106
+
107
+ for token in tokens:
108
+ if token in bad_words:
109
+ return True
110
+
111
+ return False
models/voice.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ruff: noqa: E402
2
+ # Above allows ruff to ignore E402: module level import not at top of file
3
+
4
+ import re
5
+ import tempfile
6
+
7
+ import gradio as gr
8
+ import soundfile as sf
9
+ import torchaudio
10
+
11
+ try:
12
+ import spaces
13
+
14
+ USING_SPACES = True
15
+ except ImportError:
16
+ USING_SPACES = False
17
+
18
+
19
+ def gpu_decorator(func):
20
+ if USING_SPACES:
21
+ return spaces.GPU(func)
22
+ else:
23
+ return func
24
+
25
+
26
+ from f5.src.f5_tts.model import DiT
27
+ from f5.src.f5_tts.infer.utils_infer import (
28
+ load_vocoder,
29
+ load_model,
30
+ preprocess_ref_audio_text,
31
+ infer_process,
32
+ remove_silence_for_generated_wav,
33
+ save_spectrogram,
34
+ )
35
+
36
+ F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
37
+ F5TTS_ema_model = None
38
+
39
+ vocoder = None
40
+
41
+ def load(path = "./model_1200000.safetensors"):
42
+ global F5TTS_ema_model
43
+ F5TTS_ema_model = load_model(
44
+ DiT, F5TTS_model_cfg, path
45
+ )
46
+
47
+ def loadVoc():
48
+ global vocoder
49
+ vocoder = load_vocoder()
50
+
51
+ @gpu_decorator
52
+ def infer(ref_audio_orig, ref_text, gen_text, remove_silence, cross_fade_duration=0.10, speed=0.9):
53
+ if(F5TTS_ema_model == None):
54
+ load()
55
+
56
+ if(vocoder == None):
57
+ loadVoc()
58
+
59
+ ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text)
60
+ ema_model = F5TTS_ema_model
61
+
62
+ final_wave, final_sample_rate, combined_spectrogram = infer_process(
63
+ ref_audio,
64
+ ref_text,
65
+ gen_text,
66
+ ema_model,
67
+ vocoder,
68
+ cross_fade_duration=cross_fade_duration,
69
+ speed=speed,
70
+ )
71
+
72
+ print("final_wave doe")
73
+ if remove_silence:
74
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
75
+ sf.write(f.name, final_wave, final_sample_rate)
76
+ remove_silence_for_generated_wav(f.name)
77
+ final_wave, _ = torchaudio.load(f.name)
78
+ final_wave = final_wave.squeeze().cpu().numpy()
79
+
80
+ print('silence removed')
81
+
82
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
83
+ spectrogram_path = tmp_spectrogram.name
84
+ save_spectrogram(combined_spectrogram, spectrogram_path)
85
+
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f_audio:
87
+ audio_path = f_audio.name
88
+ sf.write(audio_path, final_wave, final_sample_rate)
89
+
90
+ return audio_path, spectrogram_path
91
+
92
+ def parse_speechtypes_text(gen_text):
93
+ # Pattern to find {speechtype}
94
+ pattern = r"\{(.*?)\}"
95
+
96
+ # Split the text by the pattern
97
+ tokens = re.split(pattern, gen_text)
98
+
99
+ segments = []
100
+
101
+ current_emotion = "Regular"
102
+
103
+ for i in range(len(tokens)):
104
+ if i % 2 == 0:
105
+ # This is text
106
+ text = tokens[i].strip()
107
+ if text:
108
+ segments.append({"emotion": current_emotion, "text": text})
109
+ else:
110
+ # This is emotion
111
+ emotion = tokens[i].strip()
112
+ current_emotion = emotion
113
+
114
+ return segments
models/whisper.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from faster_whisper import WhisperModel
2
+ import torch
3
+
4
+ device = "cuda" if torch.cuda.is_available() else "cpu"
5
+ compute_type = "float16" if torch.cuda.is_available() else "int8"
6
+
7
+
8
+ model_size = "medium"
9
+ model = WhisperModel(model_size, device=device, compute_type=compute_type)
modules/__pycache__/audio.cpython-312.pyc ADDED
Binary file (2.14 kB). View file
 
modules/__pycache__/r2.cpython-312.pyc ADDED
Binary file (1.55 kB). View file
 
modules/__pycache__/redis.cpython-312.pyc ADDED
Binary file (519 Bytes). View file
 
modules/__pycache__/register.cpython-312.pyc ADDED
Binary file (3.92 kB). View file
 
modules/audio.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import subprocess
3
+ from pydub.utils import mediainfo
4
+ import math
5
+ from pydub import AudioSegment
6
+
7
+ def get_audio_duration(file_path):
8
+ try:
9
+ audio = AudioSegment.from_file(file_path)
10
+ return len(audio) / 1000 # Duração em segundos
11
+ except Exception as e:
12
+ raise ValueError(f"Error extracting duration: {e}")
13
+
14
+ def cut_audio(file_path, start_time, end_time, output_path="output_cut.wav"):
15
+ """Corta o áudio no intervalo de tempo especificado."""
16
+ try:
17
+ command = f"ffmpeg -y -i {file_path} -ss {start_time} -to {end_time} -acodec copy -loglevel debug {output_path}"
18
+
19
+ subprocess.run(command, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
20
+ return output_path
21
+ except subprocess.CalledProcessError as e:
22
+ # Exibe a mensagem de erro completa, incluindo o erro do FFmpeg
23
+ raise RuntimeError(f"Erro ao cortar o áudio: {e.stderr}")
24
+
25
+ def convert(file_path, format_extension):
26
+ file = file_path.replace("/tmp/", "")
27
+ ffmpeg_cmd = [
28
+ "ffmpeg",
29
+ "-y",
30
+ "-i", file_path,
31
+ f"out_{file}"
32
+ ]
33
+
34
+ subprocess.run(ffmpeg_cmd)
35
+ return f"out_{file}"
36
+
37
+ def add_silence_to_audio(file_path, start_silence=0, end_silence=0, output_path="output_with_silence.wav"):
38
+ """Adiciona silêncio no início e no final do áudio usando subprocess."""
39
+ try:
40
+ command = f"ffmpeg -y -i {file_path} -af \"adelay={start_silence*1000}|{start_silence*1000},apad=pad_dur={end_silence}\" {output_path}"
41
+ subprocess.run(command, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
42
+ return output_path
43
+ except subprocess.CalledProcessError as e:
44
+ raise RuntimeError(f"Erro ao adicionar silêncio no áudio: {e}")
modules/hf.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import hf_hub_download
3
+
4
+ token = os.environ.get("HF_TOKEN")
5
+
6
+ model_path = hf_hub_download(repo_id='Alertpix/new_audio_model', filename='model_1200000.safetensors', token=token, local_dir="./")
7
+
8
+ def get_model_path():
9
+ return model_path
modules/r2.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ from botocore.config import Config
3
+ import os
4
+ import io
5
+
6
+ endpoint = os.environ.get("AWS_ENDPOINT")
7
+ account_id = os.environ.get("AWS_ACCOUNT_ID")
8
+ aws_access_key_id= os.environ.get("AWS_ACCESS_KEY")
9
+ aws_secret_access_key= os.environ.get("AWS_SECRET_KEY")
10
+ bucket = os.environ.get("AWS_BUCKET")
11
+
12
+ config = Config(
13
+ signature_version='s3v4',
14
+ s3={'addressing_style': 'path', 'payload_signing_enabled': False}
15
+ )
16
+
17
+ s3 = boto3.client('s3',
18
+ endpoint_url = endpoint,
19
+ aws_access_key_id=aws_access_key_id,
20
+ aws_secret_access_key=aws_secret_access_key,
21
+ config=config)
22
+
23
+ def upload_to_s3(path, name, extension):
24
+ s3.upload_file(path, bucket, name, ExtraArgs={'ContentType': f'audio/{extension}', 'ACL': 'public-read'})
25
+ url = s3.generate_presigned_url(
26
+ 'get_object',
27
+ Params={'Bucket': bucket, 'Key': name},
28
+ ExpiresIn=604800
29
+ )
30
+ return url
31
+
32
+ def get_url(name):
33
+ url = s3.generate_presigned_url(
34
+ 'get_object',
35
+ Params={'Bucket': bucket, 'Key': name},
36
+ ExpiresIn=604800
37
+ )
38
+ return url
modules/redis.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import redis
3
+
4
+ redis_host = os.getenv('REDIS_HOST', 'localhost')
5
+ redis_pass = os.getenv('REDIS_PASS', 'localhost')
6
+ redis_port = os.getenv('REDIS_PORT', 6379)
7
+
8
+ cache = redis.Redis(host=redis_host, port=redis_port, password=redis_pass)
modules/register.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models.whisper import model
2
+ import modules.audio as audio
3
+ import os
4
+ from modules.r2 import upload_to_s3,get_url
5
+ import json
6
+ import os
7
+ import redis
8
+
9
+ redis_host = os.getenv('REDIS_HOST', 'localhost')
10
+ redis_pass = os.getenv('REDIS_PASS', 'localhost')
11
+ redis_port = os.getenv('REDIS_PORT', 6379)
12
+
13
+ cache = redis.Redis(host=redis_host, port=redis_port, password=redis_pass)
14
+
15
+ isOnline = os.environ.get("IS_ONLINE")
16
+ isOnline = "True" if isOnline == "True" else "False"
17
+
18
+ def prepare_audio(audio_path, key):
19
+ """Prepara o áudio"""
20
+ audio_duration = audio.get_audio_duration(audio_path)
21
+ audio_duration = round(audio_duration, 2)
22
+ cuted = False
23
+
24
+ if audio_duration > 15:
25
+ audio_path = audio.cut_audio(audio_path, start_time="0", end_time="12", output_path=f"{key}_cut.wav")
26
+ cuted = True
27
+
28
+ if audio_duration < 8:
29
+ raise ValueError("audio_too_short")
30
+
31
+ audio_path = audio.add_silence_to_audio(audio_path, start_silence=0.5, end_silence=0.8, output_path=f"{key}.wav")
32
+ if cuted:
33
+ os.remove(f"{key}_cut.wav")
34
+ return audio_path
35
+
36
+ def get_audio_transcription(key):
37
+ """Obtém a transcrição do áudio usando o Redis"""
38
+ if(isOnline == "True"):
39
+ data = cache.get(f"mimic:audio:translation:{key}")
40
+ return data
41
+
42
+ with open(f"{key}.txt", "r") as f:
43
+ transcription = f.read()
44
+ return transcription
45
+
46
+ def get_audio_path(key):
47
+ """Obtém o caminho do áudio usando o Redis"""
48
+ if(isOnline == "True"):
49
+ data = cache.get(f"mimic:audio:url:{key}")
50
+ if(data):
51
+ return data
52
+
53
+ new_url = get_url(f"{key}.wav")
54
+
55
+ cache.set(f"mimic:audio:url:{key}", new_url)
56
+ cache.expire(f"mimic:audio:url:{key}", 600000)
57
+
58
+ return new_url
59
+
60
+ return f"{key}.wav"
61
+
62
+ def get_audio(key):
63
+ """Obtém o áudio usando o Redis"""
64
+ transcription = get_audio_transcription(key)
65
+ audio_path = get_audio_path(key)
66
+ return {
67
+ "transcription": transcription,
68
+ "audio_path": audio_path,
69
+ "isOnline": isOnline
70
+ }
71
+
72
+ def process_audio(audio_path, key):
73
+ """Processa o áudio usando o Whisper"""
74
+ if(isOnline == "True"):
75
+ audio_exists = cache.exists(f"mimic:audio_exists:{key}")
76
+
77
+ if audio_exists:
78
+ return get_audio(key)
79
+
80
+ audio_path = prepare_audio(audio_path, key)
81
+ transcription, info = model.transcribe(audio_path, beam_size=5)
82
+ content = ""
83
+ for segment in transcription:
84
+ content = f"{content} {segment.text}"
85
+ url = upload_to_s3(audio_path, f"{key}.wav", "wav")
86
+
87
+ cache.set(f"mimic:audio:translation:{key}", content)
88
+
89
+ cache.set(f"mimic:audio:url:{key}", url)
90
+ cache.expire(f"mimic:audio:url:{key}", 600000)
91
+
92
+ cache.set(f"mimic:audio_exists:{key}", "true")
93
+ return {"transcription": content, "audio_path": url, "isOnline": isOnline}
94
+
95
+ audio_exists = os.path.exists(f"{key}.txt")
96
+ if audio_exists:
97
+ return get_audio(key)
98
+
99
+ if os.path.exists(f"{key}.wav"):
100
+ os.remove(f"{key}.wav")
101
+
102
+ audio_path = prepare_audio(audio_path, key)
103
+ transcription, info = model.transcribe(audio_path, beam_size=5)
104
+ content = ""
105
+ for segment in transcription:
106
+ content = f"{content} {segment.text}"
107
+
108
+ with open(f"{key}.txt", "w") as f:
109
+ f.write(content)
110
+
111
+ return {"transcription": content, "audio_path": audio_path, "isOnline": isOnline}
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ espeak
processor.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # download model
2
+ import modules.hf as hf
3
+
4
+ # load models
5
+ import models.voice as voice
6
+ import models.whisper as whisper
7
+
8
+ voice.load()
9
+ voice.loadVoc()
10
+
11
+ #libs
12
+ import modules.register as register
13
+ from models.censor import Wash
14
+ import requests
15
+ import os
16
+
17
+ def download_audio(url, output_file):
18
+ """
19
+ Downloads an audio file from the given URL and saves it locally.
20
+ If the file already exists, it returns the path without downloading again.
21
+
22
+ :param url: URL of the audio file
23
+ :param output_file: Path where the audio will be saved
24
+ :return: Path to the audio file
25
+ """
26
+ if os.path.exists(output_file):
27
+ print(f"File already exists: {output_file}")
28
+ return output_file
29
+
30
+ try:
31
+ response = requests.get(url, stream=True)
32
+ response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx)
33
+
34
+ with open(output_file, 'wb') as file:
35
+ for chunk in response.iter_content(chunk_size=8192):
36
+ file.write(chunk)
37
+
38
+ print(f"Audio downloaded successfully: {output_file}")
39
+ return output_file
40
+ except requests.exceptions.RequestException as e:
41
+ print(f"Error downloading audio: {e}")
42
+ return None
43
+
44
+ # generate audio function
45
+ censorModel = Wash()
46
+
47
+ def generate_audio(key, text, censor=False, offset=0, speed=0.9, crossfade=0.1):
48
+ """Generate audio from text"""
49
+ data = register.get_audio(key)
50
+ if(data["isOnline"] == "True"):
51
+ audio = download_audio(data["audio_path"], f'{key}.wav')
52
+ txt = data["transcription"].decode('utf-8')
53
+ print(txt)
54
+ audio, spectogram = voice.infer(audio, txt, text, remove_silence=True)
55
+ else:
56
+ audio, spectogram = voice.infer(data["audio_path"], data["transcription"], text, remove_silence=True, speed=speed, crossfade=crossfade)
57
+
58
+ if(censor):
59
+ audio = censorModel.process_audio(audio, offset)
60
+
61
+ return audio
62
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ffmpeg
2
+ redis
3
+ faster-whisper
4
+ boto3==1.35.99
5
+ -e git+https://github.com/SWivid/F5-TTS.git#egg=F5-TTS
6
+ phonemizer
7
+ pydub
8
+ fastapi
9
+ uvicorn
10
+ uuid
swagger.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "openapi": "3.0.0",
3
+ "info": {
4
+ "title": "Alert Pix Ai v2",
5
+ "version": "1.0.0",
6
+ "description": "This is a simple API for demonstration purposes."
7
+ },
8
+ "paths": {
9
+ "/api/generate": {
10
+ "post": {
11
+ "tags": ["Generate"],
12
+ "summary": "Generate text from prompt",
13
+ "operationId": "generateText",
14
+ "requestBody": {
15
+ "content": {
16
+ "application/json": {
17
+ "schema": {
18
+ "$ref": "#/components/schemas/Generate"
19
+ }
20
+ }
21
+ },
22
+ "required": true
23
+ },
24
+ "responses": {
25
+ "200": {
26
+ "description": "Generated text",
27
+ "content": {
28
+ "application/json": {
29
+ "schema": {
30
+ "$ref": "#/components/schemas/Generate"
31
+ }
32
+ }
33
+ }
34
+ }
35
+ }
36
+ }
37
+ }
38
+ },
39
+ "components": {
40
+ "schemas": {
41
+ "Generate": {
42
+ "type": "object",
43
+ "properties": {
44
+ "text": {
45
+ "type": "string",
46
+ "example": "This is a generated text."
47
+ }
48
+ }
49
+ }
50
+ }
51
+ }
52
+ }
utils/bad_words.py ADDED
@@ -0,0 +1,568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ words = [
2
+ "goze",
3
+ "aidético",
4
+ "aidética",
5
+ "aleijado",
6
+ "aleijada",
7
+ "analfabeto",
8
+ "analfabeta",
9
+ "fuder",
10
+ "porra",
11
+ "anus",
12
+ "anão",
13
+ "anã",
14
+ "arrombado",
15
+ "apenado",
16
+ "apenada",
17
+ "baba-ovo",
18
+ "babaca",
19
+ "babaovo",
20
+ "bacura",
21
+ "bagos",
22
+ "baianada",
23
+ "baitola",
24
+ "barbeiro",
25
+ "barraco",
26
+ "beata",
27
+ "bebum",
28
+ "besta",
29
+ "bicha",
30
+ "bisca",
31
+ "bixa",
32
+ "boazuda",
33
+ "boceta",
34
+ "boco",
35
+ "boiola",
36
+ "bokete",
37
+ "bolagato",
38
+ "bolcat",
39
+ "boquete",
40
+ "bosseta",
41
+ "bosta",
42
+ "bostana",
43
+ "boçal",
44
+ "branquelo",
45
+ "brecha",
46
+ "brexa",
47
+ "brioco",
48
+ "bronha",
49
+ "buca",
50
+ "buceta",
51
+ "bugre",
52
+ "bunda",
53
+ "bunduda",
54
+ "burra",
55
+ "burro",
56
+ "busseta",
57
+ "bárbaro",
58
+ "bêbado",
59
+ "bêbedo",
60
+ "caceta",
61
+ "cacete",
62
+ "cachorra",
63
+ "cachorro",
64
+ "cadela",
65
+ "caga",
66
+ "cagado",
67
+ "cagao",
68
+ "cagão",
69
+ "cagona",
70
+ "caipira",
71
+ "canalha",
72
+ "canceroso",
73
+ "caralho",
74
+ "casseta",
75
+ "cassete",
76
+ "ceguinho",
77
+ "checheca",
78
+ "chereca",
79
+ "chibumba",
80
+ "chibumbo",
81
+ "chifruda",
82
+ "chifrudo",
83
+ "chochota",
84
+ "chota",
85
+ "chupada",
86
+ "chupado",
87
+ "ciganos",
88
+ "clitoris",
89
+ "clitóris",
90
+ "cocaina",
91
+ "cocaína",
92
+ "coco",
93
+ "cocô",
94
+ "comunista",
95
+ "corna",
96
+ "cornagem",
97
+ "cornisse",
98
+ "corno",
99
+ "cornuda",
100
+ "cornudo",
101
+ "cornão",
102
+ "corrupta",
103
+ "corrupto",
104
+ "coxo",
105
+ "cretina",
106
+ "cretino",
107
+ "criolo",
108
+ "crioulo",
109
+ "cruz-credo",
110
+ "cu",
111
+ "cú",
112
+ "culhao",
113
+ "culhão",
114
+ "curalho",
115
+ "cuzao",
116
+ "cuzão",
117
+ "cuzuda",
118
+ "cuzudo",
119
+ "debil",
120
+ "débil",
121
+ "debiloide",
122
+ "debilóide",
123
+ "deficiente",
124
+ "defunto",
125
+ "demonio",
126
+ "demônio",
127
+ "denegrir",
128
+ "denigrir",
129
+ "detento",
130
+ "difunto",
131
+ "doida",
132
+ "doido",
133
+ "egua",
134
+ "égua",
135
+ "elemento",
136
+ "encostado",
137
+ "esclerosado",
138
+ "escrota",
139
+ "escroto",
140
+ "esporrada",
141
+ "esporrado",
142
+ "esporro",
143
+ "estupida",
144
+ "estúpida",
145
+ "estupidez",
146
+ "estupido",
147
+ "estúpido",
148
+ "facista",
149
+ "fanatico",
150
+ "fanático",
151
+ "fascista",
152
+ "fedida",
153
+ "fedido",
154
+ "fedor",
155
+ "fedorenta",
156
+ "feia",
157
+ "feio",
158
+ "feiosa",
159
+ "feioso",
160
+ "feioza",
161
+ "feiozo",
162
+ "felacao",
163
+ "felação",
164
+ "fenda",
165
+ "foda",
166
+ "fodao",
167
+ "fodão",
168
+ "fode",
169
+ "fodi",
170
+ "fodida",
171
+ "fodido",
172
+ "fornica",
173
+ "fornição",
174
+ "fudendo",
175
+ "fudeção",
176
+ "fudida",
177
+ "fudido",
178
+ "furada",
179
+ "furado",
180
+ "furnica",
181
+ "furnicar",
182
+ "furo",
183
+ "furona",
184
+ "furão",
185
+ "gai",
186
+ "gaiata",
187
+ "gaiato",
188
+ "gay",
189
+ "gilete",
190
+ "goianada",
191
+ "gonorrea",
192
+ "gonorreia",
193
+ "gonorréia",
194
+ "gosmenta",
195
+ "gosmento",
196
+ "grelinho",
197
+ "grelo",
198
+ "gringo",
199
+ "homo-sexual",
200
+ "homosexual",
201
+ "homosexualismo",
202
+ "homossexual",
203
+ "homossexualismo",
204
+ "idiota",
205
+ "idiotice",
206
+ "imbecil",
207
+ "inculto",
208
+ "iscrota",
209
+ "iscroto",
210
+ "japa",
211
+ "judiar",
212
+ "ladra",
213
+ "ladrao",
214
+ "ladroeira",
215
+ "ladrona",
216
+ "ladrão",
217
+ "lalau",
218
+ "lazarento",
219
+ "leprosa",
220
+ "leproso",
221
+ "lesbica",
222
+ "lésbica",
223
+ "louco",
224
+ "macaca",
225
+ "macaco",
226
+ "machona",
227
+ "macumbeiro",
228
+ "malandro",
229
+ "maluco",
230
+ "maneta",
231
+ "marginal",
232
+ "masturba",
233
+ "meleca",
234
+ "meliante",
235
+ "merda",
236
+ "mija",
237
+ "mijada",
238
+ "mijado",
239
+ "mijo",
240
+ "minorias",
241
+ "mocrea",
242
+ "mocreia",
243
+ "mocréia",
244
+ "moleca",
245
+ "moleque",
246
+ "mondronga",
247
+ "mondrongo",
248
+ "mongol",
249
+ "mongoloide",
250
+ "mongolóide",
251
+ "mulata",
252
+ "mulato",
253
+ "naba",
254
+ "nadega",
255
+ "nádega",
256
+ "nazista",
257
+ "negro",
258
+ "nhaca",
259
+ "nojeira",
260
+ "nojenta",
261
+ "nojento",
262
+ "nojo",
263
+ "olhota",
264
+ "otaria",
265
+ "otario",
266
+ "otária",
267
+ "otário",
268
+ "paca",
269
+ "palhaco",
270
+ "palhaço",
271
+ "paspalha",
272
+ "paspalhao",
273
+ "paspalho",
274
+ "pau",
275
+ "peia",
276
+ "peido",
277
+ "pemba",
278
+ "pentelha",
279
+ "pentelho",
280
+ "perereca",
281
+ "perneta",
282
+ "peru",
283
+ "peão",
284
+ "pica",
285
+ "picao",
286
+ "picão",
287
+ "pilantra",
288
+ "pinel",
289
+ "pinto",
290
+ "pintudo",
291
+ "pintão",
292
+ "piranha",
293
+ "piroca",
294
+ "piroco",
295
+ "piru",
296
+ "pivete",
297
+ "porra",
298
+ "prega",
299
+ "preso",
300
+ "prequito",
301
+ "priquito",
302
+ "prostibulo",
303
+ "prostituta",
304
+ "prostituto",
305
+ "punheta",
306
+ "punhetao",
307
+ "punhetão",
308
+ "pus",
309
+ "pustula",
310
+ "puta",
311
+ "puto",
312
+ "puxa-saco",
313
+ "puxasaco",
314
+ "penis",
315
+ "pênis",
316
+ "rabao",
317
+ "rabão",
318
+ "rabo",
319
+ "rabuda",
320
+ "rabudao",
321
+ "rabudão",
322
+ "rabudo",
323
+ "rabudona",
324
+ "racha",
325
+ "rachada",
326
+ "rachadao",
327
+ "rachadinha",
328
+ "rachadinho",
329
+ "rachado",
330
+ "ramela",
331
+ "remela",
332
+ "retardada",
333
+ "retardado",
334
+ "ridícula",
335
+ "roceiro",
336
+ "rola",
337
+ "rolinha",
338
+ "rosca",
339
+ "sacana",
340
+ "safada",
341
+ "safado",
342
+ "sapatao",
343
+ "sapatão",
344
+ "sifilis",
345
+ "sífilis",
346
+ "siririca",
347
+ "tarada",
348
+ "tarado",
349
+ "testuda",
350
+ "tesuda",
351
+ "tesudo",
352
+ "tezao",
353
+ "tezuda",
354
+ "tezudo",
355
+ "traveco",
356
+ "trocha",
357
+ "trolha",
358
+ "troucha",
359
+ "trouxa",
360
+ "troxa",
361
+ "tuberculoso",
362
+ "tupiniquim",
363
+ "turco",
364
+ "vaca",
365
+ "vadia",
366
+ "vagal",
367
+ "vagabunda",
368
+ "vagabundo",
369
+ "vagina",
370
+ "veada",
371
+ "veadao",
372
+ "veado",
373
+ "viada",
374
+ "viadagem",
375
+ "viadao",
376
+ "viadão",
377
+ "viado",
378
+ "viadão",
379
+ "víado",
380
+ "xana",
381
+ "xaninha",
382
+ "xavasca",
383
+ "xerereca",
384
+ "xexeca",
385
+ "xibiu",
386
+ "xibumba",
387
+ "xiíta",
388
+ "xochota",
389
+ "xota",
390
+ "xoxota",
391
+ "animal de teta",
392
+ "animaldeteta",
393
+ "anormal",
394
+ "argentino",
395
+ "arregassado",
396
+ "arrombado",
397
+ "babaca",
398
+ "baitola",
399
+ "baleia",
400
+ "barril",
401
+ "benfiquista",
402
+ "biba",
403
+ "bicha",
404
+ "bios",
405
+ "biroska",
406
+ "bobo",
407
+ "bocal",
408
+ "bolagato",
409
+ "boqueteiro",
410
+ "bosta",
411
+ "buceta",
412
+ "bundao",
413
+ "burro",
414
+ "cabaco",
415
+ "cacete",
416
+ "cadelona",
417
+ "cafona",
418
+ "cambista",
419
+ "capiroto",
420
+ "caralho",
421
+ "catraia",
422
+ "cepo",
423
+ "cocodrilo",
424
+ "cocozento",
425
+ "cu",
426
+ "debilmental",
427
+ "demente",
428
+ "desciclope",
429
+ "desgracado",
430
+ "drogado",
431
+ "eguenorante",
432
+ "endemoniado",
433
+ "energumeno",
434
+ "enfianocu",
435
+ "engolerola",
436
+ "escroto",
437
+ "esdruxulo",
438
+ "esporrado",
439
+ "estigalhado",
440
+ "estrume",
441
+ "estrunxado",
442
+ "estupido",
443
+ "fdp",
444
+ "fidumaegua",
445
+ "filhodaputa",
446
+ "fiofo",
447
+ "foda",
448
+ "fuder",
449
+ "fudido",
450
+ "fulera",
451
+ "galinha",
452
+ "gambiarra",
453
+ "geisyarruda",
454
+ "gnu",
455
+ "gonorreia",
456
+ "gordoescroto",
457
+ "gozado",
458
+ "herege",
459
+ "idiota",
460
+ "ignorante",
461
+ "imbecil",
462
+ "imundo",
463
+ "inascivel",
464
+ "inseto",
465
+ "invertebrado",
466
+ "jacu",
467
+ "jegue",
468
+ "jumento",
469
+ "kct",
470
+ "komodo",
471
+ "ku",
472
+ "lazarento",
473
+ "lazaro!",
474
+ "leproso",
475
+ "lerdo",
476
+ "lesma",
477
+ "lezado",
478
+ "lico",
479
+ "limpezaanal",
480
+ "lixo",
481
+ "lombriga",
482
+ "macaco",
483
+ "marimoon",
484
+ "merda",
485
+ "meretriz",
486
+ "miolodecu",
487
+ "mocorongo",
488
+ "montedemerda",
489
+ "morfetico",
490
+ "mulambo",
491
+ "n00b",
492
+ "nazista",
493
+ "nerd",
494
+ "newbie",
495
+ "nhaca",
496
+ "nonsense",
497
+ "ogro",
498
+ "olhodocu",
499
+ "olhogordo",
500
+ "otario",
501
+ "palhaco",
502
+ "panaca",
503
+ "paraguaio",
504
+ "passaralho",
505
+ "paunocu",
506
+ "periquita",
507
+ "pimenteira",
508
+ "pipoca",
509
+ "piranha",
510
+ "piroca",
511
+ "pistoleira",
512
+ "porra",
513
+ "prostituta",
514
+ "punheta",
515
+ "puta",
516
+ "putaquepariu",
517
+ "quasimodo",
518
+ "quenga",
519
+ "quirguistao",
520
+ "rampero",
521
+ "rapariga",
522
+ "raspadinha",
523
+ "retardado",
524
+ "rusguento",
525
+ "sanguesuga",
526
+ "sujo",
527
+ "tapado",
528
+ "tarado",
529
+ "tesao",
530
+ "tetuda",
531
+ "tetudo",
532
+ "tosco",
533
+ "tragado",
534
+ "travesti",
535
+ "trepadeira",
536
+ "troglodita",
537
+ "urubu",
538
+ "vaca",
539
+ "vadia",
540
+ "vagabundo",
541
+ "vagaranha",
542
+ "vaiamerda",
543
+ "vaisefuder",
544
+ "vaitomarnocu",
545
+ "vascaino",
546
+ "verme",
547
+ "viado",
548
+ "xavasca",
549
+ "xereca",
550
+ "xixizento",
551
+ "xoxota",
552
+ "xupetinha",
553
+ "xupisco",
554
+ "xurupita",
555
+ "xuxexo",
556
+ "xxt",
557
+ "xxx",
558
+ "zebuceta",
559
+ "ziguizira",
560
+ "zina",
561
+ "zoado",
562
+ "zoiudo",
563
+ "zoneira",
564
+ "zuado",
565
+ "zuera",
566
+ "zulu",
567
+ "zureta",
568
+ ]