Spaces:

guardiancc
/

dubai

Sleeping

App Files Files Community

fountai commited on Jan 21

Commit

1fe2f2f

1 Parent(s): a235f02

mimic

Browse files

Files changed (24) hide show

.env.example +10 -0
.gitattributes copy +35 -0
.gitignore +4 -0
.gitmodules +3 -0
README copy.md +12 -0
app.py +16 -32
models/__pycache__/whisper.cpython-312.pyc +0 -0
models/censor.py +111 -0
models/voice.py +114 -0
models/whisper.py +9 -0
modules/__pycache__/audio.cpython-312.pyc +0 -0
modules/__pycache__/r2.cpython-312.pyc +0 -0
modules/__pycache__/redis.cpython-312.pyc +0 -0
modules/__pycache__/register.cpython-312.pyc +0 -0
modules/audio.py +44 -0
modules/hf.py +9 -0
modules/r2.py +38 -0
modules/redis.py +8 -0
modules/register.py +111 -0
packages.txt +2 -0
processor.py +62 -0
requirements.txt +10 -0
swagger.json +52 -0
utils/bad_words.py +568 -0

.env.example ADDED Viewed

	@@ -0,0 +1,10 @@

+HF_TOKEN=
+AWS_ENDPOINT=
+AWS_ACCOUNT_ID=
+AWS_ACCESS_KEY=
+AWS_SECRET_KEY=
+AWS_BUCKET=
+REDIS_HOST=
+REDIS_PORT=
+REDIS_DB=
+REDIS_PASS=

.gitattributes copy ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+run.py
+audio.wav
+key.wav
+key.txt

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "f5"]
+	path = f5
+	url = https://github.com/eletroswing/f5

README copy.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Mimic
+emoji: 🔥
+colorFrom: yellow
+colorTo: purple
+sdk: gradio
+sdk_version: 5.9.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -71,38 +71,8 @@ class ProcessRequest(BaseModel):
     offset: float = -0.3
     format: str = "wav"
     speed: float = 0.8
-    crossfade: float = 0.1
-q = queue.Queue()
-def process_queue(q):
-    while True:
-        try:
-            key, censor, offset, text, format, speed, crossfade, id, receiver, webhook = q.get(timeout=5)
-            audio = generate_audio(key, text, censor, offset, speed=speed, crossfade=crossfade)
-            convertedAudioPath = convert(audio, format)
-            duration = get_audio_duration(convertedAudioPath)
-            audioUrl = upload_to_s3(convertedAudioPath, f"{id}", format)
-            os.remove(audio)
-            os.remove(convertedAudioPath)
-            payload = {
-                    "id": id,
-                    "duration": duration,
-                    "receiver": receiver,
-                    "url": audioUrl
-                }
-            requests.post(webhook, json=payload)
-        except Exception as e:
-            print(e)
-        finally:
-            q.task_done()
-worker_thread = threading.Thread(target=process_queue, args=(q,))
-worker_thread.start()
 @app.post("/process")
 def process_audio(payload: ProcessRequest):
     key = payload.key
@@ -120,7 +90,21 @@ def process_audio(payload: ProcessRequest):
         raise HTTPException(status_code=500, detail=str(e))
     try:
-        q.put((key, censor, offset, text, format, speed, crossfade, id, receiver, webhook))
         return {"success": True, "err": ""}
     except ValueError as e:

     offset: float = -0.3
     format: str = "wav"
     speed: float = 0.8
+    crossfade: float = 0.06
 @app.post("/process")
 def process_audio(payload: ProcessRequest):
     key = payload.key
         raise HTTPException(status_code=500, detail=str(e))
     try:
+        audio = generate_audio(key, text, censor, offset, speed=speed, crossfade=crossfade)
+        convertedAudioPath = convert(audio, format)
+        duration = get_audio_duration(convertedAudioPath)
+        audioUrl = upload_to_s3(convertedAudioPath, f"{id}", format)
+        os.remove(audio)
+        os.remove(convertedAudioPath)
+        payload = {
+                "id": id,
+                "duration": duration,
+                "receiver": receiver,
+                "url": audioUrl
+            }
+        requests.post(webhook, json=payload)
         return {"success": True, "err": ""}
     except ValueError as e:

models/__pycache__/whisper.cpython-312.pyc ADDED Viewed

Binary file (520 Bytes). View file

models/censor.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from utils.bad_words import words
+import re
+from models.whisper import model
+import random
+import string
+import subprocess
+class Wash:
+    def __init__(self, custom_db = None):
+        self.db = words
+        if(custom_db):
+          self.db = custom_db
+    @staticmethod
+    def clean(phrase):
+        return re.sub(r'\s+', ' ', phrase.lower())
+    @staticmethod
+    def tokenize(phrase):
+        with_punctuation = re.sub(r'[^a-zA-Z\s]', '', phrase).split()
+        without_punctuation = re.sub(r'[^a-zA-Z\s]', '', phrase).split()
+        return with_punctuation + without_punctuation
+    def words(self):
+        return self.db
+    def check_word(self, word):
+      word = word.replace(" ", "")
+      bad_words = self.words()
+      tokens = self.tokenize(word)
+      for token in tokens:
+            if token in bad_words:
+                return True
+      return False
+    def generate_random_name(self, length):
+        letters = string.ascii_lowercase
+        return ''.join(random.choice(letters) for _ in range(length))
+    def insert_beep(self, itensList, original_file, offset=0):
+        if not itensList:
+            return
+        output = self.generate_random_name(8) + ".wav"
+        filter_str = "[0]volume=0:enable=\'"
+        for i, item in enumerate(itensList):
+            start = item["start"]
+            end = item["end"]
+            start_plus_offset =  round(start + offset, 2)
+            end_minus_offset =  round(end - offset, 2)
+            filter_str += f'between(t,{start_plus_offset},{end_minus_offset})'
+            if i != len(itensList) - 1:
+                filter_str += '+'
+            else:
+                filter_str += "\'"
+        for item in itensList:
+            start = round(item["start"], 2)
+            end = round(item["end"], 2)
+            start_plus_offset = round(start + offset, 2)
+            end_minus_offset = round(end - offset, 2)
+            filter_str += f'[main{start}];'
+            filter_str += f'sine=d={round(end_minus_offset - start_plus_offset, 2)}:f=1000,adelay={start_plus_offset * 1000}s,pan=stereo|FL=c0|FR=c0[beep{start}];'
+            filter_str += f'[main{start}][beep{start}]amix=inputs=2:duration=longest'
+        filter_str = filter_str.rstrip(',')
+        ffmpeg_cmd = [
+            "ffmpeg",
+            "-i",
+            original_file,
+            "-filter_complex",
+            filter_str,
+            output
+        ]
+        result = subprocess.run(ffmpeg_cmd)
+        return output
+    def process_audio(self, audio_path, offset):
+        bad_time = []
+        segments, info = model.transcribe(audio_path, word_timestamps=True, beam_size=1, best_of=1)
+        for segment in segments:
+          for word in segment.words:
+              is_profane = self.check_word(word.word)
+              if(is_profane):
+                bad_time.append({
+                    "start": word.start,
+                    "end": word.end,
+                })
+        result = self.insert_beep(bad_time, audio_path, offset)
+        if(result):
+          return result
+        return audio_path
+    def check(self, phrase):
+        cleaned_phrase = self.clean(phrase)
+        cleaned_phrase = phrase.replace(" ", "")
+        tokens = self.tokenize(cleaned_phrase)
+        bad_words = self.words()
+        for token in tokens:
+            if token in bad_words:
+                return True
+        return False

models/voice.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# ruff: noqa: E402
+# Above allows ruff to ignore E402: module level import not at top of file
+import re
+import tempfile
+import gradio as gr
+import soundfile as sf
+import torchaudio
+try:
+    import spaces
+    USING_SPACES = True
+except ImportError:
+    USING_SPACES = False
+def gpu_decorator(func):
+    if USING_SPACES:
+        return spaces.GPU(func)
+    else:
+        return func
+from f5.src.f5_tts.model import DiT
+from f5.src.f5_tts.infer.utils_infer import (
+    load_vocoder,
+    load_model,
+    preprocess_ref_audio_text,
+    infer_process,
+    remove_silence_for_generated_wav,
+    save_spectrogram,
+)
+F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
+F5TTS_ema_model = None
+vocoder = None
+def load(path = "./model_1200000.safetensors"):
+    global F5TTS_ema_model
+    F5TTS_ema_model = load_model(
+        DiT, F5TTS_model_cfg, path
+    )
+def loadVoc():
+    global vocoder
+    vocoder = load_vocoder()
+@gpu_decorator
+def infer(ref_audio_orig, ref_text, gen_text, remove_silence, cross_fade_duration=0.10, speed=0.9):
+    if(F5TTS_ema_model == None):
+        load()
+    if(vocoder == None):
+        loadVoc()
+    ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text)
+    ema_model = F5TTS_ema_model
+    final_wave, final_sample_rate, combined_spectrogram = infer_process(
+        ref_audio,
+        ref_text,
+        gen_text,
+        ema_model,
+        vocoder,
+        cross_fade_duration=cross_fade_duration,
+        speed=speed,
+    )
+    print("final_wave doe")
+    if remove_silence:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            sf.write(f.name, final_wave, final_sample_rate)
+            remove_silence_for_generated_wav(f.name)
+            final_wave, _ = torchaudio.load(f.name)
+        final_wave = final_wave.squeeze().cpu().numpy()
+    print('silence removed')
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
+        spectrogram_path = tmp_spectrogram.name
+        save_spectrogram(combined_spectrogram, spectrogram_path)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f_audio:
+        audio_path = f_audio.name
+        sf.write(audio_path, final_wave, final_sample_rate)
+    return audio_path, spectrogram_path
+def parse_speechtypes_text(gen_text):
+    # Pattern to find {speechtype}
+    pattern = r"\{(.*?)\}"
+    # Split the text by the pattern
+    tokens = re.split(pattern, gen_text)
+    segments = []
+    current_emotion = "Regular"
+    for i in range(len(tokens)):
+        if i % 2 == 0:
+            # This is text
+            text = tokens[i].strip()
+            if text:
+                segments.append({"emotion": current_emotion, "text": text})
+        else:
+            # This is emotion
+            emotion = tokens[i].strip()
+            current_emotion = emotion
+    return segments

models/whisper.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from faster_whisper import WhisperModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+compute_type = "float16" if torch.cuda.is_available() else "int8"
+model_size = "medium"
+model = WhisperModel(model_size, device=device, compute_type=compute_type)

modules/__pycache__/audio.cpython-312.pyc ADDED Viewed

Binary file (2.14 kB). View file

modules/__pycache__/r2.cpython-312.pyc ADDED Viewed

Binary file (1.55 kB). View file

modules/__pycache__/redis.cpython-312.pyc ADDED Viewed

Binary file (519 Bytes). View file

modules/__pycache__/register.cpython-312.pyc ADDED Viewed

Binary file (3.92 kB). View file

modules/audio.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import re
+import subprocess
+from pydub.utils import mediainfo
+import math
+from pydub import AudioSegment
+def get_audio_duration(file_path):
+    try:
+        audio = AudioSegment.from_file(file_path)
+        return len(audio) / 1000  # Duração em segundos
+    except Exception as e:
+        raise ValueError(f"Error extracting duration: {e}")
+def cut_audio(file_path, start_time, end_time, output_path="output_cut.wav"):
+    """Corta o áudio no intervalo de tempo especificado."""
+    try:
+        command = f"ffmpeg -y -i {file_path} -ss {start_time} -to {end_time} -acodec copy -loglevel debug {output_path}"
+        subprocess.run(command, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        return output_path
+    except subprocess.CalledProcessError as e:
+        # Exibe a mensagem de erro completa, incluindo o erro do FFmpeg
+        raise RuntimeError(f"Erro ao cortar o áudio: {e.stderr}")
+def convert(file_path, format_extension):
+        file = file_path.replace("/tmp/", "")
+        ffmpeg_cmd = [
+                "ffmpeg",
+                "-y",
+                "-i", file_path,
+                f"out_{file}"
+            ]
+        subprocess.run(ffmpeg_cmd)
+        return f"out_{file}"
+def add_silence_to_audio(file_path, start_silence=0, end_silence=0, output_path="output_with_silence.wav"):
+    """Adiciona silêncio no início e no final do áudio usando subprocess."""
+    try:
+        command = f"ffmpeg -y -i {file_path} -af \"adelay={start_silence*1000}|{start_silence*1000},apad=pad_dur={end_silence}\" {output_path}"
+        subprocess.run(command, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        return output_path
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(f"Erro ao adicionar silêncio no áudio: {e}")

modules/hf.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+from huggingface_hub import hf_hub_download
+token = os.environ.get("HF_TOKEN")
+model_path = hf_hub_download(repo_id='Alertpix/new_audio_model', filename='model_1200000.safetensors', token=token, local_dir="./")
+def get_model_path():
+    return model_path

modules/r2.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import boto3
+from botocore.config import Config
+import os
+import io
+endpoint = os.environ.get("AWS_ENDPOINT")
+account_id = os.environ.get("AWS_ACCOUNT_ID")
+aws_access_key_id= os.environ.get("AWS_ACCESS_KEY")
+aws_secret_access_key= os.environ.get("AWS_SECRET_KEY")
+bucket = os.environ.get("AWS_BUCKET")
+config = Config(
+    signature_version='s3v4',
+    s3={'addressing_style': 'path', 'payload_signing_enabled': False}
+)
+s3 = boto3.client('s3',
+                  endpoint_url = endpoint,
+                  aws_access_key_id=aws_access_key_id,
+                  aws_secret_access_key=aws_secret_access_key,
+                  config=config)
+def upload_to_s3(path, name, extension):
+    s3.upload_file(path, bucket, name, ExtraArgs={'ContentType': f'audio/{extension}', 'ACL': 'public-read'})
+    url = s3.generate_presigned_url(
+            'get_object',
+            Params={'Bucket': bucket, 'Key': name},
+            ExpiresIn=604800
+        )
+    return url
+def get_url(name):
+    url = s3.generate_presigned_url(
+            'get_object',
+            Params={'Bucket': bucket, 'Key': name},
+            ExpiresIn=604800
+        )
+    return url

modules/redis.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import os
+import redis
+redis_host = os.getenv('REDIS_HOST', 'localhost')
+redis_pass = os.getenv('REDIS_PASS', 'localhost')
+redis_port = os.getenv('REDIS_PORT', 6379)
+cache = redis.Redis(host=redis_host, port=redis_port, password=redis_pass)

modules/register.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from models.whisper import model
+import modules.audio as audio
+import os
+from modules.r2 import upload_to_s3,get_url
+import json
+import os
+import redis
+redis_host = os.getenv('REDIS_HOST', 'localhost')
+redis_pass = os.getenv('REDIS_PASS', 'localhost')
+redis_port = os.getenv('REDIS_PORT', 6379)
+cache = redis.Redis(host=redis_host, port=redis_port, password=redis_pass)
+isOnline = os.environ.get("IS_ONLINE")
+isOnline = "True" if isOnline == "True" else "False"
+def prepare_audio(audio_path, key):
+    """Prepara o áudio"""
+    audio_duration = audio.get_audio_duration(audio_path)
+    audio_duration = round(audio_duration, 2)
+    cuted = False
+    if audio_duration > 15:
+        audio_path = audio.cut_audio(audio_path, start_time="0", end_time="12", output_path=f"{key}_cut.wav")
+        cuted = True
+    if audio_duration < 8:
+        raise ValueError("audio_too_short")
+    audio_path = audio.add_silence_to_audio(audio_path, start_silence=0.5, end_silence=0.8, output_path=f"{key}.wav")
+    if cuted:
+        os.remove(f"{key}_cut.wav")
+    return audio_path
+def get_audio_transcription(key):
+    """Obtém a transcrição do áudio usando o Redis"""
+    if(isOnline == "True"):
+        data = cache.get(f"mimic:audio:translation:{key}")
+        return data
+    with open(f"{key}.txt", "r") as f:
+        transcription = f.read()
+    return transcription
+def get_audio_path(key):
+    """Obtém o caminho do áudio usando o Redis"""
+    if(isOnline == "True"):
+        data = cache.get(f"mimic:audio:url:{key}")
+        if(data):
+            return data
+        new_url = get_url(f"{key}.wav")
+        cache.set(f"mimic:audio:url:{key}", new_url)
+        cache.expire(f"mimic:audio:url:{key}", 600000)
+        return new_url
+    return f"{key}.wav"
+def get_audio(key):
+    """Obtém o áudio usando o Redis"""
+    transcription = get_audio_transcription(key)
+    audio_path = get_audio_path(key)
+    return {
+        "transcription": transcription,
+        "audio_path": audio_path,
+        "isOnline": isOnline
+    }
+def process_audio(audio_path, key):
+    """Processa o áudio usando o Whisper"""
+    if(isOnline == "True"):
+        audio_exists = cache.exists(f"mimic:audio_exists:{key}")
+        if audio_exists:
+            return get_audio(key)
+        audio_path = prepare_audio(audio_path, key)
+        transcription, info = model.transcribe(audio_path, beam_size=5)
+        content = ""
+        for segment in transcription:
+            content = f"{content} {segment.text}"
+        url = upload_to_s3(audio_path, f"{key}.wav", "wav")
+        cache.set(f"mimic:audio:translation:{key}", content)
+        cache.set(f"mimic:audio:url:{key}", url)
+        cache.expire(f"mimic:audio:url:{key}", 600000)
+        cache.set(f"mimic:audio_exists:{key}", "true")
+        return {"transcription": content, "audio_path": url, "isOnline": isOnline}
+    audio_exists = os.path.exists(f"{key}.txt")
+    if audio_exists:
+        return get_audio(key)
+    if os.path.exists(f"{key}.wav"):
+        os.remove(f"{key}.wav")
+    audio_path = prepare_audio(audio_path, key)
+    transcription, info = model.transcribe(audio_path, beam_size=5)
+    content = ""
+    for segment in transcription:
+        content = f"{content} {segment.text}"
+    with open(f"{key}.txt", "w") as f:
+        f.write(content)
+    return {"transcription": content, "audio_path": audio_path, "isOnline": isOnline}

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ffmpeg
2	+ espeak

processor.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# download model
+import modules.hf as hf
+# load models
+import models.voice as voice
+import models.whisper as whisper
+voice.load()
+voice.loadVoc()
+#libs
+import modules.register as register
+from models.censor import Wash
+import requests
+import os
+def download_audio(url, output_file):
+    """
+    Downloads an audio file from the given URL and saves it locally.
+    If the file already exists, it returns the path without downloading again.
+    :param url: URL of the audio file
+    :param output_file: Path where the audio will be saved
+    :return: Path to the audio file
+    """
+    if os.path.exists(output_file):
+        print(f"File already exists: {output_file}")
+        return output_file
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
+        with open(output_file, 'wb') as file:
+            for chunk in response.iter_content(chunk_size=8192):
+                file.write(chunk)
+        print(f"Audio downloaded successfully: {output_file}")
+        return output_file
+    except requests.exceptions.RequestException as e:
+        print(f"Error downloading audio: {e}")
+        return None
+# generate audio function
+censorModel = Wash()
+def generate_audio(key, text, censor=False, offset=0, speed=0.9, crossfade=0.1):
+    """Generate audio from text"""
+    data = register.get_audio(key)
+    if(data["isOnline"] == "True"):
+        audio = download_audio(data["audio_path"], f'{key}.wav')
+        txt = data["transcription"].decode('utf-8')
+        print(txt)
+        audio, spectogram = voice.infer(audio, txt, text, remove_silence=True)
+    else:
+        audio, spectogram = voice.infer(data["audio_path"], data["transcription"], text, remove_silence=True, speed=speed, crossfade=crossfade)
+    if(censor):
+        audio = censorModel.process_audio(audio, offset)
+    return audio

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+ffmpeg
+redis
+faster-whisper
+boto3==1.35.99
+-e git+https://github.com/SWivid/F5-TTS.git#egg=F5-TTS
+phonemizer
+pydub
+fastapi
+uvicorn
+uuid

swagger.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+    "openapi": "3.0.0",
+    "info": {
+      "title": "Alert Pix Ai v2",
+      "version": "1.0.0",
+      "description": "This is a simple API for demonstration purposes."
+    },
+    "paths": {
+      "/api/generate": {
+        "post": {
+          "tags": ["Generate"],
+          "summary": "Generate text from prompt",
+          "operationId": "generateText",
+          "requestBody": {
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Generate"
+                }
+              }
+            },
+            "required": true
+          },
+          "responses": {
+            "200": {
+              "description": "Generated text",
+              "content": {
+                "application/json": {
+                  "schema": {
+                    "$ref": "#/components/schemas/Generate"
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "components": {
+      "schemas": {
+        "Generate": {
+          "type": "object",
+          "properties": {
+            "text": {
+              "type": "string",
+              "example": "This is a generated text."
+            }
+          }
+        }
+      }
+    }
+  }

utils/bad_words.py ADDED Viewed

	@@ -0,0 +1,568 @@

+words = [
+  "goze",
+  "aidético",
+  "aidética",
+  "aleijado",
+  "aleijada",
+  "analfabeto",
+  "analfabeta",
+  "fuder",
+  "porra",
+  "anus",
+  "anão",
+  "anã",
+  "arrombado",
+  "apenado",
+  "apenada",
+  "baba-ovo",
+  "babaca",
+  "babaovo",
+  "bacura",
+  "bagos",
+  "baianada",
+  "baitola",
+  "barbeiro",
+  "barraco",
+  "beata",
+  "bebum",
+  "besta",
+  "bicha",
+  "bisca",
+  "bixa",
+  "boazuda",
+  "boceta",
+  "boco",
+  "boiola",
+  "bokete",
+  "bolagato",
+  "bolcat",
+  "boquete",
+  "bosseta",
+  "bosta",
+  "bostana",
+  "boçal",
+  "branquelo",
+  "brecha",
+  "brexa",
+  "brioco",
+  "bronha",
+  "buca",
+  "buceta",
+  "bugre",
+  "bunda",
+  "bunduda",
+  "burra",
+  "burro",
+  "busseta",
+  "bárbaro",
+  "bêbado",
+  "bêbedo",
+  "caceta",
+  "cacete",
+  "cachorra",
+  "cachorro",
+  "cadela",
+  "caga",
+  "cagado",
+  "cagao",
+  "cagão",
+  "cagona",
+  "caipira",
+  "canalha",
+  "canceroso",
+  "caralho",
+  "casseta",
+  "cassete",
+  "ceguinho",
+  "checheca",
+  "chereca",
+  "chibumba",
+  "chibumbo",
+  "chifruda",
+  "chifrudo",
+  "chochota",
+  "chota",
+  "chupada",
+  "chupado",
+  "ciganos",
+  "clitoris",
+  "clitóris",
+  "cocaina",
+  "cocaína",
+  "coco",
+  "cocô",
+  "comunista",
+  "corna",
+  "cornagem",
+  "cornisse",
+  "corno",
+  "cornuda",
+  "cornudo",
+  "cornão",
+  "corrupta",
+  "corrupto",
+  "coxo",
+  "cretina",
+  "cretino",
+  "criolo",
+  "crioulo",
+  "cruz-credo",
+  "cu",
+  "cú",
+  "culhao",
+  "culhão",
+  "curalho",
+  "cuzao",
+  "cuzão",
+  "cuzuda",
+  "cuzudo",
+  "debil",
+  "débil",
+  "debiloide",
+  "debilóide",
+  "deficiente",
+  "defunto",
+  "demonio",
+  "demônio",
+  "denegrir",
+  "denigrir",
+  "detento",
+  "difunto",
+  "doida",
+  "doido",
+  "egua",
+  "égua",
+  "elemento",
+  "encostado",
+  "esclerosado",
+  "escrota",
+  "escroto",
+  "esporrada",
+  "esporrado",
+  "esporro",
+  "estupida",
+  "estúpida",
+  "estupidez",
+  "estupido",
+  "estúpido",
+  "facista",
+  "fanatico",
+  "fanático",
+  "fascista",
+  "fedida",
+  "fedido",
+  "fedor",
+  "fedorenta",
+  "feia",
+  "feio",
+  "feiosa",
+  "feioso",
+  "feioza",
+  "feiozo",
+  "felacao",
+  "felação",
+  "fenda",
+  "foda",
+  "fodao",
+  "fodão",
+  "fode",
+  "fodi",
+  "fodida",
+  "fodido",
+  "fornica",
+  "fornição",
+  "fudendo",
+  "fudeção",
+  "fudida",
+  "fudido",
+  "furada",
+  "furado",
+  "furnica",
+  "furnicar",
+  "furo",
+  "furona",
+  "furão",
+  "gai",
+  "gaiata",
+  "gaiato",
+  "gay",
+  "gilete",
+  "goianada",
+  "gonorrea",
+  "gonorreia",
+  "gonorréia",
+  "gosmenta",
+  "gosmento",
+  "grelinho",
+  "grelo",
+  "gringo",
+  "homo-sexual",
+  "homosexual",
+  "homosexualismo",
+  "homossexual",
+  "homossexualismo",
+  "idiota",
+  "idiotice",
+  "imbecil",
+  "inculto",
+  "iscrota",
+  "iscroto",
+  "japa",
+  "judiar",
+  "ladra",
+  "ladrao",
+  "ladroeira",
+  "ladrona",
+  "ladrão",
+  "lalau",
+  "lazarento",
+  "leprosa",
+  "leproso",
+  "lesbica",
+  "lésbica",
+  "louco",
+  "macaca",
+  "macaco",
+  "machona",
+  "macumbeiro",
+  "malandro",
+  "maluco",
+  "maneta",
+  "marginal",
+  "masturba",
+  "meleca",
+  "meliante",
+  "merda",
+  "mija",
+  "mijada",
+  "mijado",
+  "mijo",
+  "minorias",
+  "mocrea",
+  "mocreia",
+  "mocréia",
+  "moleca",
+  "moleque",
+  "mondronga",
+  "mondrongo",
+  "mongol",
+  "mongoloide",
+  "mongolóide",
+  "mulata",
+  "mulato",
+  "naba",
+  "nadega",
+  "nádega",
+  "nazista",
+  "negro",
+  "nhaca",
+  "nojeira",
+  "nojenta",
+  "nojento",
+  "nojo",
+  "olhota",
+  "otaria",
+  "otario",
+  "otária",
+  "otário",
+  "paca",
+  "palhaco",
+  "palhaço",
+  "paspalha",
+  "paspalhao",
+  "paspalho",
+  "pau",
+  "peia",
+  "peido",
+  "pemba",
+  "pentelha",
+  "pentelho",
+  "perereca",
+  "perneta",
+  "peru",
+  "peão",
+  "pica",
+  "picao",
+  "picão",
+  "pilantra",
+  "pinel",
+  "pinto",
+  "pintudo",
+  "pintão",
+  "piranha",
+  "piroca",
+  "piroco",
+  "piru",
+  "pivete",
+  "porra",
+  "prega",
+  "preso",
+  "prequito",
+  "priquito",
+  "prostibulo",
+  "prostituta",
+  "prostituto",
+  "punheta",
+  "punhetao",
+  "punhetão",
+  "pus",
+  "pustula",
+  "puta",
+  "puto",
+  "puxa-saco",
+  "puxasaco",
+  "penis",
+  "pênis",
+  "rabao",
+  "rabão",
+  "rabo",
+  "rabuda",
+  "rabudao",
+  "rabudão",
+  "rabudo",
+  "rabudona",
+  "racha",
+  "rachada",
+  "rachadao",
+  "rachadinha",
+  "rachadinho",
+  "rachado",
+  "ramela",
+  "remela",
+  "retardada",
+  "retardado",
+  "ridícula",
+  "roceiro",
+  "rola",
+  "rolinha",
+  "rosca",
+  "sacana",
+  "safada",
+  "safado",
+  "sapatao",
+  "sapatão",
+  "sifilis",
+  "sífilis",
+  "siririca",
+  "tarada",
+  "tarado",
+  "testuda",
+  "tesuda",
+  "tesudo",
+  "tezao",
+  "tezuda",
+  "tezudo",
+  "traveco",
+  "trocha",
+  "trolha",
+  "troucha",
+  "trouxa",
+  "troxa",
+  "tuberculoso",
+  "tupiniquim",
+  "turco",
+  "vaca",
+  "vadia",
+  "vagal",
+  "vagabunda",
+  "vagabundo",
+  "vagina",
+  "veada",
+  "veadao",
+  "veado",
+  "viada",
+  "viadagem",
+  "viadao",
+  "viadão",
+  "viado",
+  "viadão",
+  "víado",
+  "xana",
+  "xaninha",
+  "xavasca",
+  "xerereca",
+  "xexeca",
+  "xibiu",
+  "xibumba",
+  "xiíta",
+  "xochota",
+  "xota",
+  "xoxota",
+  "animal de teta",
+  "animaldeteta",
+  "anormal",
+  "argentino",
+  "arregassado",
+  "arrombado",
+  "babaca",
+  "baitola",
+  "baleia",
+  "barril",
+  "benfiquista",
+  "biba",
+  "bicha",
+  "bios",
+  "biroska",
+  "bobo",
+  "bocal",
+  "bolagato",
+  "boqueteiro",
+  "bosta",
+  "buceta",
+  "bundao",
+  "burro",
+  "cabaco",
+  "cacete",
+  "cadelona",
+  "cafona",
+  "cambista",
+  "capiroto",
+  "caralho",
+  "catraia",
+  "cepo",
+  "cocodrilo",
+  "cocozento",
+  "cu",
+  "debilmental",
+  "demente",
+  "desciclope",
+  "desgracado",
+  "drogado",
+  "eguenorante",
+  "endemoniado",
+  "energumeno",
+  "enfianocu",
+  "engolerola",
+  "escroto",
+  "esdruxulo",
+  "esporrado",
+  "estigalhado",
+  "estrume",
+  "estrunxado",
+  "estupido",
+  "fdp",
+  "fidumaegua",
+  "filhodaputa",
+  "fiofo",
+  "foda",
+  "fuder",
+  "fudido",
+  "fulera",
+  "galinha",
+  "gambiarra",
+  "geisyarruda",
+  "gnu",
+  "gonorreia",
+  "gordoescroto",
+  "gozado",
+  "herege",
+  "idiota",
+  "ignorante",
+  "imbecil",
+  "imundo",
+  "inascivel",
+  "inseto",
+  "invertebrado",
+  "jacu",
+  "jegue",
+  "jumento",
+  "kct",
+  "komodo",
+  "ku",
+  "lazarento",
+  "lazaro!",
+  "leproso",
+  "lerdo",
+  "lesma",
+  "lezado",
+  "lico",
+  "limpezaanal",
+  "lixo",
+  "lombriga",
+  "macaco",
+  "marimoon",
+  "merda",
+  "meretriz",
+  "miolodecu",
+  "mocorongo",
+  "montedemerda",
+  "morfetico",
+  "mulambo",
+  "n00b",
+  "nazista",
+  "nerd",
+  "newbie",
+  "nhaca",
+  "nonsense",
+  "ogro",
+  "olhodocu",
+  "olhogordo",
+  "otario",
+  "palhaco",
+  "panaca",
+  "paraguaio",
+  "passaralho",
+  "paunocu",
+  "periquita",
+  "pimenteira",
+  "pipoca",
+  "piranha",
+  "piroca",
+  "pistoleira",
+  "porra",
+  "prostituta",
+  "punheta",
+  "puta",
+  "putaquepariu",
+  "quasimodo",
+  "quenga",
+  "quirguistao",
+  "rampero",
+  "rapariga",
+  "raspadinha",
+  "retardado",
+  "rusguento",
+  "sanguesuga",
+  "sujo",
+  "tapado",
+  "tarado",
+  "tesao",
+  "tetuda",
+  "tetudo",
+  "tosco",
+  "tragado",
+  "travesti",
+  "trepadeira",
+  "troglodita",
+  "urubu",
+  "vaca",
+  "vadia",
+  "vagabundo",
+  "vagaranha",
+  "vaiamerda",
+  "vaisefuder",
+  "vaitomarnocu",
+  "vascaino",
+  "verme",
+  "viado",
+  "xavasca",
+  "xereca",
+  "xixizento",
+  "xoxota",
+  "xupetinha",
+  "xupisco",
+  "xurupita",
+  "xuxexo",
+  "xxt",
+  "xxx",
+  "zebuceta",
+  "ziguizira",
+  "zina",
+  "zoado",
+  "zoiudo",
+  "zoneira",
+  "zuado",
+  "zuera",
+  "zulu",
+  "zureta",
+  ]