Spaces:
Paused
Paused
Upload 3 files
Browse files- app.py +17 -25
- requirements.txt +180 -0
- utils.py +77 -41
app.py
CHANGED
@@ -1,23 +1,7 @@
|
|
1 |
-
# ---
|
2 |
-
# jupyter:
|
3 |
-
# jupytext:
|
4 |
-
# formats: ipynb,py:light
|
5 |
-
# text_representation:
|
6 |
-
# extension: .py
|
7 |
-
# format_name: light
|
8 |
-
# format_version: '1.5'
|
9 |
-
# jupytext_version: 1.14.1
|
10 |
-
# kernelspec:
|
11 |
-
# display_name: Python 3 (ipykernel)
|
12 |
-
# language: python
|
13 |
-
# name: python3
|
14 |
-
# ---
|
15 |
-
|
16 |
-
# +
|
17 |
import gradio as gr
|
18 |
import requests
|
19 |
import json
|
20 |
-
from utils import get_story, get_voice_filename,
|
21 |
|
22 |
with gr.Blocks() as performance:
|
23 |
with gr.Tab("Story Generation"):
|
@@ -33,21 +17,29 @@ with gr.Blocks() as performance:
|
|
33 |
with gr.Tab("Voice Generation"):
|
34 |
input_text = gr.Textbox(lines=10, label="๋ฌธ์ฅ")
|
35 |
input_gender = gr.Radio(choices=["๋จ์ฑ", "์ฌ์ฑ"], value="๋จ์ฑ", label="์ฑ๋ณ")
|
36 |
-
input_age = gr.Radio(choices=["
|
37 |
-
|
|
|
|
|
|
|
38 |
with gr.Column():
|
39 |
b2 = gr.Button().style(full_width=True)
|
40 |
-
b2.click(get_voice_filename, [input_text, input_gender, input_age], [gr.Audio(label="๊ฒฐ๊ณผ ์์ฑ ํ์ผ", type="filepath")])
|
41 |
|
42 |
-
with gr.Tab("Music Generation"):
|
43 |
input_text = gr.Textbox(lines=10, label="๋ฌธ์ฅ")
|
44 |
input_duration = gr.Number(label="์์
์๊ฐ(s)", value=30)
|
45 |
with gr.Row():
|
46 |
with gr.Column():
|
47 |
b3 = gr.Button().style(full_width=True)
|
48 |
-
b3.click(
|
49 |
-
|
50 |
-
performance.queue(max_size=5).launch()
|
51 |
-
# -
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import requests
|
3 |
import json
|
4 |
+
from utils import get_story, get_voice_filename, get_musicgen_music, get_mubert_music
|
5 |
|
6 |
with gr.Blocks() as performance:
|
7 |
with gr.Tab("Story Generation"):
|
|
|
17 |
with gr.Tab("Voice Generation"):
|
18 |
input_text = gr.Textbox(lines=10, label="๋ฌธ์ฅ")
|
19 |
input_gender = gr.Radio(choices=["๋จ์ฑ", "์ฌ์ฑ"], value="๋จ์ฑ", label="์ฑ๋ณ")
|
20 |
+
input_age = gr.Radio(choices=["์ฒญ๋
", "์ค๋
"], value='์ฒญ๋
', label="์ฐ๋ น๋")
|
21 |
+
input_speed = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="์๋")
|
22 |
+
input_pitch = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="์์")
|
23 |
+
input_alpha = gr.Slider(minimum=-5, maximum=0, step=1, value=-1, label="๋๋ฎ์ด")
|
24 |
+
with gr.Row():
|
25 |
with gr.Column():
|
26 |
b2 = gr.Button().style(full_width=True)
|
27 |
+
b2.click(get_voice_filename, [input_text, input_gender, input_age, input_speed, input_pitch, input_alpha], [gr.Audio(label="๊ฒฐ๊ณผ ์์ฑ ํ์ผ", type="filepath")])
|
28 |
|
29 |
+
with gr.Tab("Music Generation - MusicGen"):
|
30 |
input_text = gr.Textbox(lines=10, label="๋ฌธ์ฅ")
|
31 |
input_duration = gr.Number(label="์์
์๊ฐ(s)", value=30)
|
32 |
with gr.Row():
|
33 |
with gr.Column():
|
34 |
b3 = gr.Button().style(full_width=True)
|
35 |
+
b3.click(get_musicgen_music, [input_text, input_duration], gr.Audio(label="๊ฒฐ๊ณผ ์์
ํ์ผ", type='filepath'))
|
|
|
|
|
|
|
36 |
|
37 |
+
# with gr.Tab("Music Generation - Mubert"):
|
38 |
+
# input_text = gr.Textbox(lines=10, label="๋ฌธ์ฅ")
|
39 |
+
# input_duration = gr.Number(label="์์
์๊ฐ(s)", value=30)
|
40 |
+
# with gr.Row():
|
41 |
+
# with gr.Column():
|
42 |
+
# b3 = gr.Button().style(full_width=True)
|
43 |
+
# b3.click(get_mubert_music, [input_text, input_duration], gr.Audio(label="๊ฒฐ๊ณผ ์์
ํ์ผ", type='filepath'))
|
44 |
|
45 |
+
performance.queue().launch(share=True, auth=('soojinchoi', 'dance1234'))
|
requirements.txt
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohttp==3.8.5
|
3 |
+
aiosignal==1.3.1
|
4 |
+
altair==5.0.1
|
5 |
+
annotated-types==0.5.0
|
6 |
+
antlr4-python3-runtime==4.9.3
|
7 |
+
anyio==3.7.1
|
8 |
+
appdirs==1.4.4
|
9 |
+
asttokens==2.2.1
|
10 |
+
async-timeout==4.0.3
|
11 |
+
attrs==23.1.0
|
12 |
+
audiocraft==0.0.2
|
13 |
+
audioread==3.0.0
|
14 |
+
av==10.0.0
|
15 |
+
backcall==0.2.0
|
16 |
+
blis==0.7.10
|
17 |
+
catalogue==2.0.9
|
18 |
+
certifi==2023.7.22
|
19 |
+
cffi==1.15.1
|
20 |
+
charset-normalizer==3.2.0
|
21 |
+
click==8.1.6
|
22 |
+
cloudpickle==2.2.1
|
23 |
+
cmake==3.27.1
|
24 |
+
colorlog==6.7.0
|
25 |
+
comm==0.1.4
|
26 |
+
confection==0.1.1
|
27 |
+
contourpy==1.1.0
|
28 |
+
cycler==0.11.0
|
29 |
+
cymem==2.0.7
|
30 |
+
Cython==3.0.0
|
31 |
+
debugpy==1.6.7.post1
|
32 |
+
decorator==5.1.1
|
33 |
+
demucs==4.0.0
|
34 |
+
diffq==0.2.4
|
35 |
+
docopt==0.6.2
|
36 |
+
dora-search==0.1.12
|
37 |
+
einops==0.6.1
|
38 |
+
exceptiongroup==1.1.2
|
39 |
+
executing==1.2.0
|
40 |
+
fastapi==0.101.0
|
41 |
+
ffmpy==0.3.1
|
42 |
+
filelock==3.12.2
|
43 |
+
flashy==0.0.2
|
44 |
+
fonttools==4.42.0
|
45 |
+
frozenlist==1.4.0
|
46 |
+
fsspec==2023.6.0
|
47 |
+
gradio==3.40.1
|
48 |
+
gradio_client==0.4.0
|
49 |
+
h11==0.14.0
|
50 |
+
httpcore==0.17.3
|
51 |
+
httpx==0.24.1
|
52 |
+
huggingface-hub==0.16.4
|
53 |
+
hydra-colorlog==1.2.0
|
54 |
+
hydra-core==1.3.2
|
55 |
+
idna==3.4
|
56 |
+
importlib-metadata==6.8.0
|
57 |
+
importlib-resources==6.0.1
|
58 |
+
ipykernel==6.25.1
|
59 |
+
ipython==8.14.0
|
60 |
+
jedi==0.19.0
|
61 |
+
Jinja2==3.1.2
|
62 |
+
joblib==1.3.2
|
63 |
+
jsonschema==4.19.0
|
64 |
+
jsonschema-specifications==2023.7.1
|
65 |
+
julius==0.2.7
|
66 |
+
jupyter_client==8.3.0
|
67 |
+
jupyter_core==5.3.1
|
68 |
+
kiwisolver==1.4.4
|
69 |
+
lameenc==1.5.1
|
70 |
+
langcodes==3.3.0
|
71 |
+
lazy_loader==0.3
|
72 |
+
librosa==0.10.0.post2
|
73 |
+
linkify-it-py==2.0.2
|
74 |
+
lit==16.0.6
|
75 |
+
llvmlite==0.40.1
|
76 |
+
markdown-it-py==2.2.0
|
77 |
+
MarkupSafe==2.1.3
|
78 |
+
matplotlib==3.7.2
|
79 |
+
matplotlib-inline==0.1.6
|
80 |
+
mdit-py-plugins==0.3.3
|
81 |
+
mdurl==0.1.2
|
82 |
+
mpmath==1.3.0
|
83 |
+
msgpack==1.0.5
|
84 |
+
multidict==6.0.4
|
85 |
+
murmurhash==1.0.9
|
86 |
+
mypy-extensions==1.0.0
|
87 |
+
nest-asyncio==1.5.7
|
88 |
+
networkx==3.1
|
89 |
+
num2words==0.5.12
|
90 |
+
numba==0.57.1
|
91 |
+
numpy==1.24.4
|
92 |
+
nvidia-cublas-cu11==11.10.3.66
|
93 |
+
nvidia-cuda-cupti-cu11==11.7.101
|
94 |
+
nvidia-cuda-nvrtc-cu11==11.7.99
|
95 |
+
nvidia-cuda-runtime-cu11==11.7.99
|
96 |
+
nvidia-cudnn-cu11==8.5.0.96
|
97 |
+
nvidia-cufft-cu11==10.9.0.58
|
98 |
+
nvidia-curand-cu11==10.2.10.91
|
99 |
+
nvidia-cusolver-cu11==11.4.0.1
|
100 |
+
nvidia-cusparse-cu11==11.7.4.91
|
101 |
+
nvidia-nccl-cu11==2.14.3
|
102 |
+
nvidia-nvtx-cu11==11.7.91
|
103 |
+
omegaconf==2.3.0
|
104 |
+
openunmix==1.2.1
|
105 |
+
orjson==3.9.4
|
106 |
+
packaging==23.1
|
107 |
+
pandas==2.0.3
|
108 |
+
parso==0.8.3
|
109 |
+
pathy==0.10.2
|
110 |
+
pexpect==4.8.0
|
111 |
+
pickleshare==0.7.5
|
112 |
+
Pillow==10.0.0
|
113 |
+
platformdirs==3.10.0
|
114 |
+
pooch==1.6.0
|
115 |
+
preshed==3.0.8
|
116 |
+
prompt-toolkit==3.0.39
|
117 |
+
psutil==5.9.5
|
118 |
+
ptyprocess==0.7.0
|
119 |
+
pure-eval==0.2.2
|
120 |
+
pycparser==2.21
|
121 |
+
pydantic==1.10.12
|
122 |
+
pydantic_core==2.4.0
|
123 |
+
pydub==0.25.1
|
124 |
+
Pygments==2.16.1
|
125 |
+
pyparsing==3.0.9
|
126 |
+
pyre-extensions==0.0.29
|
127 |
+
python-dateutil==2.8.2
|
128 |
+
python-dotenv==1.0.0
|
129 |
+
python-multipart==0.0.6
|
130 |
+
pytz==2023.3
|
131 |
+
PyYAML==6.0.1
|
132 |
+
pyzmq==25.1.1
|
133 |
+
referencing==0.30.2
|
134 |
+
regex==2023.8.8
|
135 |
+
requests==2.31.0
|
136 |
+
retrying==1.3.4
|
137 |
+
rpds-py==0.9.2
|
138 |
+
safetensors==0.3.2
|
139 |
+
scikit-learn==1.3.0
|
140 |
+
scipy==1.11.1
|
141 |
+
semantic-version==2.10.0
|
142 |
+
sentencepiece==0.1.99
|
143 |
+
six==1.16.0
|
144 |
+
smart-open==6.3.0
|
145 |
+
sniffio==1.3.0
|
146 |
+
soundfile==0.12.1
|
147 |
+
soxr==0.3.5
|
148 |
+
spacy==3.5.2
|
149 |
+
spacy-legacy==3.0.12
|
150 |
+
spacy-loggers==1.0.4
|
151 |
+
srsly==2.4.7
|
152 |
+
stack-data==0.6.2
|
153 |
+
starlette==0.27.0
|
154 |
+
submitit==1.4.5
|
155 |
+
sympy==1.12
|
156 |
+
thinc==8.1.12
|
157 |
+
threadpoolctl==3.2.0
|
158 |
+
tokenizers==0.13.3
|
159 |
+
toolz==0.12.0
|
160 |
+
torch==2.0.1
|
161 |
+
torchaudio==2.0.2
|
162 |
+
tornado==6.3.2
|
163 |
+
tqdm==4.66.1
|
164 |
+
traitlets==5.9.0
|
165 |
+
transformers==4.31.0
|
166 |
+
treetable==0.2.5
|
167 |
+
triton==2.0.0
|
168 |
+
typer==0.7.0
|
169 |
+
typing-inspect==0.9.0
|
170 |
+
typing_extensions==4.7.1
|
171 |
+
tzdata==2023.3
|
172 |
+
uc-micro-py==1.0.2
|
173 |
+
urllib3==2.0.4
|
174 |
+
uvicorn==0.23.2
|
175 |
+
wasabi==1.1.2
|
176 |
+
wcwidth==0.2.6
|
177 |
+
websockets==11.0.3
|
178 |
+
xformers==0.0.20
|
179 |
+
yarl==1.9.2
|
180 |
+
zipp==3.16.2
|
utils.py
CHANGED
@@ -1,19 +1,3 @@
|
|
1 |
-
# ---
|
2 |
-
# jupyter:
|
3 |
-
# jupytext:
|
4 |
-
# formats: ipynb,py:light
|
5 |
-
# text_representation:
|
6 |
-
# extension: .py
|
7 |
-
# format_name: light
|
8 |
-
# format_version: '1.5'
|
9 |
-
# jupytext_version: 1.14.1
|
10 |
-
# kernelspec:
|
11 |
-
# display_name: Python 3 (ipykernel)
|
12 |
-
# language: python
|
13 |
-
# name: python3
|
14 |
-
# ---
|
15 |
-
|
16 |
-
# +
|
17 |
import os
|
18 |
import sys
|
19 |
import time
|
@@ -22,7 +6,8 @@ import json
|
|
22 |
import random
|
23 |
import requests
|
24 |
from voice import voice_dict
|
25 |
-
|
|
|
26 |
OPENAPI_KEY = os.getenv('OPENAPI_KEY')
|
27 |
CLOVA_VOICE_Client_ID = os.getenv('CLOVA_VOICE_Client_ID')
|
28 |
CLOVA_VOICE_Client_Secret = os.getenv('CLOVA_VOICE_Client_Secret')
|
@@ -32,25 +17,31 @@ mubert_pat = os.getenv('mubert_pat')
|
|
32 |
SUMMARY_Client_ID = os.getenv('SUMMARY_Client_ID')
|
33 |
SUMMARY_Client_Secret = os.getenv('SUMMARY_Client_Secret')
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
def get_story(first_sentence:str, num_sentences:int):
|
37 |
-
response = requests.post("https://api.openai.com/v1/chat/completions",
|
38 |
-
headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
|
39 |
-
data=json.dumps({
|
40 |
-
"model": "gpt-3.5-turbo",
|
41 |
-
"messages": [{"role": "system", "content": "You are a helpful assistant."},
|
42 |
-
{"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
|
43 |
-
\n\nFirst sentence: {first_sentence}"""}]
|
44 |
-
}))
|
45 |
-
return response.json()['choices'][0]['message']['content']
|
46 |
|
47 |
-
def get_voice(input_text:str, gender:str="female", age_group:str="youth", filename="voice.mp3"):
|
48 |
"""
|
49 |
gender: female or male
|
50 |
age_group: child, teenager, youth, middle_aged
|
51 |
"""
|
52 |
speaker = random.choice(voice_dict[gender][age_group])
|
53 |
-
data = {"speaker":speaker, "text":input_text}
|
54 |
url = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts"
|
55 |
headers = {
|
56 |
"X-NCP-APIGW-API-KEY-ID": CLOVA_VOICE_Client_ID,
|
@@ -112,12 +103,13 @@ def get_summary(input_text:str, summary_count:int = 5):
|
|
112 |
response = requests.post(url, headers=headers, data=json.dumps(data))
|
113 |
if response.status_code == 200:
|
114 |
return ' '.join(response.json()['summary'].split('\n'))
|
|
|
|
|
115 |
else:
|
116 |
print("Error Code: " + str(response.status_code))
|
117 |
print("Error Message: " + str(response.json()))
|
118 |
|
119 |
-
|
120 |
-
def get_music(text, duration=300):
|
121 |
print('original text length: ', len(text))
|
122 |
summary = get_summary(text, 3)
|
123 |
print('summary text length: ', len(summary))
|
@@ -186,6 +178,50 @@ def get_music(text, duration=300):
|
|
186 |
print(f"{local_filename} ํ์ผ์ด ์ ์ฅ๋์์ต๋๋ค.")
|
187 |
return local_filename
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
|
191 |
history.append(first_sentence)
|
@@ -224,24 +260,24 @@ def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
|
|
224 |
yield chat, history, response
|
225 |
|
226 |
|
227 |
-
def get_voice_filename(text, gender, age):
|
228 |
filename = None
|
229 |
if gender == '๋จ์ฑ':
|
230 |
if age == "์ด๋ฆฐ์ด":
|
231 |
-
filename = get_voice(text, gender="male", age_group="child", filename="voice.mp3")
|
232 |
elif age == "์ฒญ์๋
":
|
233 |
-
filename = get_voice(text, gender="male", age_group="teenager", filename="voice.mp3")
|
234 |
elif age == "์ฒญ๋
":
|
235 |
-
filename = get_voice(text, gender="male", age_group="youth", filename="voice.mp3")
|
236 |
elif age == "์ค๋
":
|
237 |
-
filename = get_voice(text, gender="male", age_group="middle_aged", filename="voice.mp3")
|
238 |
else:
|
239 |
if age == "์ด๋ฆฐ์ด":
|
240 |
-
filename = get_voice(text, gender="female", age_group="child", filename="voice.mp3")
|
241 |
elif age == "์ฒญ์๋
":
|
242 |
-
filename = get_voice(text, gender="female", age_group="teenager", filename="voice.mp3")
|
243 |
elif age == "์ฒญ๋
":
|
244 |
-
filename = get_voice(text, gender="female", age_group="youth", filename="voice.mp3")
|
245 |
elif age == "์ค๋
":
|
246 |
-
filename = get_voice(text, gender="female", age_group="middle_aged", filename="voice.mp3")
|
247 |
-
return filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
import time
|
|
|
6 |
import random
|
7 |
import requests
|
8 |
from voice import voice_dict
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
load_dotenv('credentials.env')
|
11 |
OPENAPI_KEY = os.getenv('OPENAPI_KEY')
|
12 |
CLOVA_VOICE_Client_ID = os.getenv('CLOVA_VOICE_Client_ID')
|
13 |
CLOVA_VOICE_Client_Secret = os.getenv('CLOVA_VOICE_Client_Secret')
|
|
|
17 |
SUMMARY_Client_ID = os.getenv('SUMMARY_Client_ID')
|
18 |
SUMMARY_Client_Secret = os.getenv('SUMMARY_Client_Secret')
|
19 |
|
20 |
+
import time
|
21 |
+
import os
|
22 |
+
import subprocess
|
23 |
+
from tempfile import NamedTemporaryFile
|
24 |
+
|
25 |
+
import torch
|
26 |
+
from audiocraft.data.audio import audio_write
|
27 |
+
from audiocraft.models import MusicGen
|
28 |
+
|
29 |
+
# Using small model, better results would be obtained with `medium` or `large`.
|
30 |
+
model = MusicGen.get_pretrained('melody')
|
31 |
+
model.set_generation_params(
|
32 |
+
use_sampling=True,
|
33 |
+
top_k=250,
|
34 |
+
duration=30
|
35 |
+
)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
def get_voice(input_text:str, gender:str="female", age_group:str="youth", speed:int=1, pitch:int=1, alpha:int=-1, filename="voice.mp3"):
|
39 |
"""
|
40 |
gender: female or male
|
41 |
age_group: child, teenager, youth, middle_aged
|
42 |
"""
|
43 |
speaker = random.choice(voice_dict[gender][age_group])
|
44 |
+
data = {"speaker":speaker, "text":input_text, 'speed':speed, 'pitch':pitch, 'alpha':alpha}
|
45 |
url = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts"
|
46 |
headers = {
|
47 |
"X-NCP-APIGW-API-KEY-ID": CLOVA_VOICE_Client_ID,
|
|
|
103 |
response = requests.post(url, headers=headers, data=json.dumps(data))
|
104 |
if response.status_code == 200:
|
105 |
return ' '.join(response.json()['summary'].split('\n'))
|
106 |
+
elif response.status_code == 400 and response.json()['error']['errorCode'] == 'E100':
|
107 |
+
return input_text
|
108 |
else:
|
109 |
print("Error Code: " + str(response.status_code))
|
110 |
print("Error Message: " + str(response.json()))
|
111 |
|
112 |
+
def get_mubert_music(text, duration=300):
|
|
|
113 |
print('original text length: ', len(text))
|
114 |
summary = get_summary(text, 3)
|
115 |
print('summary text length: ', len(summary))
|
|
|
178 |
print(f"{local_filename} ํ์ผ์ด ์ ์ฅ๋์์ต๋๋ค.")
|
179 |
return local_filename
|
180 |
|
181 |
+
def get_musicgen_music(text, duration=300):
|
182 |
+
file_name = 'musicgen_output.wav'
|
183 |
+
print('original text length: ', len(text))
|
184 |
+
summary = get_summary(text, 3)
|
185 |
+
print('summary text length: ', len(summary))
|
186 |
+
translated_text = translate_text(summary)
|
187 |
+
print('translated_text length: ', len(translated_text))
|
188 |
+
if len(translated_text) > 200:
|
189 |
+
translated_text = translated_text[:200]
|
190 |
+
print(translated_text)
|
191 |
+
start = time.time()
|
192 |
+
overlap = 5
|
193 |
+
music_length = 30
|
194 |
+
target_length = duration
|
195 |
+
desc = [translated_text]
|
196 |
+
print(model.sample_rate)
|
197 |
+
output = model.generate(descriptions=desc, progress=True)
|
198 |
+
while music_length < target_length:
|
199 |
+
last_sec = output[:, :, int(-overlap*model.sample_rate):]
|
200 |
+
cont = model.generate_continuation(last_sec, model.sample_rate, descriptions=desc, progress=True)
|
201 |
+
output = torch.cat([output[:, :, :int(-overlap*model.sample_rate)], cont], 2)
|
202 |
+
music_length = output.shape[2] / model.sample_rate
|
203 |
+
if music_length > target_length:
|
204 |
+
output = output[:, :, :int(target_length*model.sample_rate)]
|
205 |
+
|
206 |
+
output = output.detach().cpu().float()[0]
|
207 |
+
audio_write(
|
208 |
+
file_name, output, model.sample_rate, strategy="loudness",
|
209 |
+
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
210 |
+
|
211 |
+
print(f'Elapsed time: {time.time() - start}')
|
212 |
+
return file_name
|
213 |
+
|
214 |
+
# def get_story(first_sentence:str, history, num_sentences:int):
|
215 |
+
# response = requests.post("https://api.openai.com/v1/chat/completions",
|
216 |
+
# headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
|
217 |
+
# data=json.dumps({
|
218 |
+
# "model": "gpt-3.5-turbo",
|
219 |
+
# "messages": [{"role": "system", "content": "You are a helpful assistant."},
|
220 |
+
# {"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
|
221 |
+
# \n\nStory: {first_sentence}"""}]
|
222 |
+
# }))
|
223 |
+
# print(response.json())
|
224 |
+
# return response.json()['choices'][0]['message']['content']
|
225 |
|
226 |
def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
|
227 |
history.append(first_sentence)
|
|
|
260 |
yield chat, history, response
|
261 |
|
262 |
|
263 |
+
def get_voice_filename(text, gender, age, speed, pitch, alpha):
|
264 |
filename = None
|
265 |
if gender == '๋จ์ฑ':
|
266 |
if age == "์ด๋ฆฐ์ด":
|
267 |
+
filename = get_voice(text, gender="male", age_group="child", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
268 |
elif age == "์ฒญ์๋
":
|
269 |
+
filename = get_voice(text, gender="male", age_group="teenager", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
270 |
elif age == "์ฒญ๋
":
|
271 |
+
filename = get_voice(text, gender="male", age_group="youth", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
272 |
elif age == "์ค๋
":
|
273 |
+
filename = get_voice(text, gender="male", age_group="middle_aged", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
274 |
else:
|
275 |
if age == "์ด๋ฆฐ์ด":
|
276 |
+
filename = get_voice(text, gender="female", age_group="child", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
277 |
elif age == "์ฒญ์๋
":
|
278 |
+
filename = get_voice(text, gender="female", age_group="teenager", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
279 |
elif age == "์ฒญ๋
":
|
280 |
+
filename = get_voice(text, gender="female", age_group="youth", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
281 |
elif age == "์ค๋
":
|
282 |
+
filename = get_voice(text, gender="female", age_group="middle_aged", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
|
283 |
+
return filename
|