File size: 1,362 Bytes
6a81069
5ecf50f
 
dac6347
5ecf50f
 
 
 
dac6347
5ecf50f
 
 
 
 
 
 
 
 
 
 
 
 
 
dac6347
 
 
 
 
 
5ecf50f
 
8d444a7
6a81069
dac6347
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from TTS.api import TTS  # Coqui TTS ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ

# ๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ชจ๋ธ ๋กœ๋“œ
model_name = "mradermacher/SambaLingo-Bulgarian-Base-i1-GGUF"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# ๋ถˆ๊ฐ€๋ฆฌ์•„์–ด TTS ๋ชจ๋ธ ๋กœ๋“œ (Coqui TTS)
tts = TTS(model_name="tts_models/bg/cv/vits", progress_bar=False)

# ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ฐ TTS ๋ณ€ํ™˜ ํ•จ์ˆ˜
def generate_audio(input_text):
    # ํ…์ŠคํŠธ ์ƒ์„ฑ
    inputs = tokenizer.encode(input_text, return_tensors="pt")
    outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # TTS ๋ณ€ํ™˜
    audio_path = "output.wav"
    tts.tts_to_file(text=generated_text, file_path=audio_path)
    return audio_path

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
interface = gr.Interface(
    fn=generate_audio,
    inputs=gr.Textbox(lines=5, label="๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ ์ž…๋ ฅ"),
    outputs=gr.Audio(label="์ƒ์„ฑ๋œ ์Œ์„ฑ"),
    title="๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ฐ ์Œ์„ฑ ๋ณ€ํ™˜",
    description="๋ถˆ๊ฐ€๋ฆฌ์•„์–ด ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ๋ชจ๋ธ์ด ํ…์ŠคํŠธ๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ์ด๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค."
)

# ์›น ์•ฑ ์‹คํ–‰
interface.launch()