Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from openai import OpenAI
|
3 |
+
import re
|
4 |
+
from konlpy.tag import Kkma
|
5 |
+
from TextUtil.digit2text import digit2txt, NNGdigit2txt, CSign2txt
|
6 |
+
|
7 |
+
def process_txt(text):
|
8 |
+
kkma = Kkma()
|
9 |
+
|
10 |
+
result = ""
|
11 |
+
pattern = re.compile(r'([κ°-ν£]+)|([a-zA-Z.]+)|(\d[\d,.]*)|(\$|β¬|Β£|Β₯|οΏ¦)|(\s+)')
|
12 |
+
|
13 |
+
matches = pattern.finditer(text)
|
14 |
+
for match in matches:
|
15 |
+
if match.group(1): # Korean part
|
16 |
+
result += match.group(1)
|
17 |
+
elif match.group(2):
|
18 |
+
result += match.group(2)
|
19 |
+
elif match.group(3): # Number part
|
20 |
+
end_index = match.end(3)
|
21 |
+
# NNG Case
|
22 |
+
next_word = kkma.pos(text[end_index:])[0]
|
23 |
+
if next_word[1] == "NNG" and next_word[0] not in ['λ¬λ¬', 'μ λ£', 'νμ΄λ', 'μ', 'μ']:
|
24 |
+
result += NNGdigit2txt(match.group(3).replace(',', ''))
|
25 |
+
else:
|
26 |
+
result += digit2txt(match.group(3).replace(',', ''))
|
27 |
+
elif match.group(4): # Currency symbol part
|
28 |
+
result += CSign2txt(match.group(4))
|
29 |
+
elif match.group(5): # Space part
|
30 |
+
result += match.group(5)
|
31 |
+
return result
|
32 |
+
|
33 |
+
|
34 |
+
def generate_audio(api_key, file, model, voice):
|
35 |
+
# OpenAI ν΄λΌμ΄μΈνΈ μ΄κΈ°ν (μ¬μ©μ μ
λ ₯ API ν€ μ¬μ©)
|
36 |
+
client = OpenAI(api_key=api_key)
|
37 |
+
|
38 |
+
# νμΌ μ½κΈ°
|
39 |
+
text = file.decode("utf-8")
|
40 |
+
|
41 |
+
# ν
μ€νΈ μ²λ¦¬
|
42 |
+
text = process_txt(text)
|
43 |
+
print(text)
|
44 |
+
|
45 |
+
# TTS μμ²
|
46 |
+
response = client.audio.speech.create(
|
47 |
+
model=model,
|
48 |
+
voice=voice,
|
49 |
+
input=text
|
50 |
+
)
|
51 |
+
|
52 |
+
# MP3 νμΌλ‘ μ μ₯
|
53 |
+
f_name = "generated_audio"
|
54 |
+
speech_file_path = f"{f_name}.mp3"
|
55 |
+
response.stream_to_file(speech_file_path)
|
56 |
+
|
57 |
+
return speech_file_path
|
58 |
+
|
59 |
+
# Gradio μΈν°νμ΄μ€ μ μ
|
60 |
+
iface = gr.Interface(
|
61 |
+
fn=generate_audio,
|
62 |
+
inputs=[
|
63 |
+
gr.Text(label="Enter OpenAI API Key"),
|
64 |
+
gr.File(label="Upload Text File", type="binary"),
|
65 |
+
gr.Radio(choices=["tts-1", "tts-1-hd"], label="Model"),
|
66 |
+
gr.Radio(choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"], label="Voice")
|
67 |
+
],
|
68 |
+
outputs=gr.File(label="Download MP3 File"),
|
69 |
+
title="Text-to-Speech Converter (Korean Digit2Text)",
|
70 |
+
description="Upload a text file and enter your OpenAI API key to convert it into speech using OpenAI's Text-to-Speech models.<br>*ν΄λΉ μλΉμ€λ νκ΅μ΄μ λ§μΆ€νλμ΄ μμ΅λλ€. <br>*νκ΅μ΄ μ«μ λ°μ λ³νμ ν΅ν΄ λ μ νν μ«μ TTSλ₯Ό κ°λ₯νκ² ν©λλ€.<br>*μμ: 50,000$ -> μ€λ§λ¬λ¬, 5κ°μ§ -> λ€μ―κ°μ§, 99κΆ -> μνμνκΆ"
|
71 |
+
)
|