bgtts / app.py
englissi's picture
Update app.py
5ecf50f verified
raw
history blame
1.36 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from TTS.api import TTS # Coqui TTS ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
# ๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ชจ๋ธ ๋กœ๋“œ
model_name = "mradermacher/SambaLingo-Bulgarian-Base-i1-GGUF"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# ๋ถˆ๊ฐ€๋ฆฌ์•„์–ด TTS ๋ชจ๋ธ ๋กœ๋“œ (Coqui TTS)
tts = TTS(model_name="tts_models/bg/cv/vits", progress_bar=False)
# ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ฐ TTS ๋ณ€ํ™˜ ํ•จ์ˆ˜
def generate_audio(input_text):
# ํ…์ŠคํŠธ ์ƒ์„ฑ
inputs = tokenizer.encode(input_text, return_tensors="pt")
outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# TTS ๋ณ€ํ™˜
audio_path = "output.wav"
tts.tts_to_file(text=generated_text, file_path=audio_path)
return audio_path
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
interface = gr.Interface(
fn=generate_audio,
inputs=gr.Textbox(lines=5, label="๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ ์ž…๋ ฅ"),
outputs=gr.Audio(label="์ƒ์„ฑ๋œ ์Œ์„ฑ"),
title="๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ฐ ์Œ์„ฑ ๋ณ€ํ™˜",
description="๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ๋ชจ๋ธ์ด ํ…์ŠคํŠธ๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ์ด๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค."
)
# ์›น ์•ฑ ์‹คํ–‰
interface.launch()