Spaces:

shangrilar
/

soojinchoi_test

Paused

App Files Files Community

shangrilar commited on Mar 19, 2024

Commit

1f85a54

verified ·

1 Parent(s): 1ba2119

Upload 3 files

Browse files

Files changed (3) hide show

app.py +17 -25
requirements.txt +180 -0
utils.py +77 -41

app.py CHANGED Viewed

@@ -1,23 +1,7 @@
-# ---
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py:light
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.1
-#   kernelspec:
-#     display_name: Python 3 (ipykernel)
-#     language: python
-#     name: python3
-# ---
-# +
 import gradio as gr
 import requests
 import json
-from utils import get_story, get_voice_filename, get_music
 with gr.Blocks() as performance:
     with gr.Tab("Story Generation"):
@@ -33,21 +17,29 @@ with gr.Blocks() as performance:
     with gr.Tab("Voice Generation"):
         input_text = gr.Textbox(lines=10, label="문장")
         input_gender = gr.Radio(choices=["남성", "여성"], value="남성", label="성별")
-        input_age = gr.Radio(choices=["어린이", "청소년", "청년", "중년"], value='청년', label="연령대")
-        with gr.Row():
             with gr.Column():
                 b2 = gr.Button().style(full_width=True)
-        b2.click(get_voice_filename, [input_text, input_gender, input_age], [gr.Audio(label="결과 음성 파일", type="filepath")])
-    with gr.Tab("Music Generation"):
         input_text = gr.Textbox(lines=10, label="문장")
         input_duration = gr.Number(label="음악 시간(s)", value=30)
         with gr.Row():
             with gr.Column():
                 b3 = gr.Button().style(full_width=True)
-        b3.click(get_music, [input_text, input_duration], gr.Audio(label="결과 음악 파일", type='filepath'))
-performance.queue(max_size=5).launch()
-# -

 import gradio as gr
 import requests
 import json
+from utils import get_story, get_voice_filename, get_musicgen_music, get_mubert_music
 with gr.Blocks() as performance:
     with gr.Tab("Story Generation"):
     with gr.Tab("Voice Generation"):
         input_text = gr.Textbox(lines=10, label="문장")
         input_gender = gr.Radio(choices=["남성", "여성"], value="남성", label="성별")
+        input_age = gr.Radio(choices=["청년", "중년"], value='청년', label="연령대")
+        input_speed = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="속도")
+        input_pitch = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="음색")
+        input_alpha = gr.Slider(minimum=-5, maximum=0, step=1, value=-1, label="높낮이")
+        with gr.Row():
             with gr.Column():
                 b2 = gr.Button().style(full_width=True)
+        b2.click(get_voice_filename, [input_text, input_gender, input_age, input_speed, input_pitch, input_alpha], [gr.Audio(label="결과 음성 파일", type="filepath")])
+    with gr.Tab("Music Generation - MusicGen"):
         input_text = gr.Textbox(lines=10, label="문장")
         input_duration = gr.Number(label="음악 시간(s)", value=30)
         with gr.Row():
             with gr.Column():
                 b3 = gr.Button().style(full_width=True)
+        b3.click(get_musicgen_music, [input_text, input_duration], gr.Audio(label="결과 음악 파일", type='filepath'))
+    # with gr.Tab("Music Generation - Mubert"):
+    #     input_text = gr.Textbox(lines=10, label="문장")
+    #     input_duration = gr.Number(label="음악 시간(s)", value=30)
+    #     with gr.Row():
+    #         with gr.Column():
+    #             b3 = gr.Button().style(full_width=True)
+    #     b3.click(get_mubert_music, [input_text, input_duration], gr.Audio(label="결과 음악 파일", type='filepath'))
+performance.queue().launch(share=True, auth=('soojinchoi', 'dance1234'))

requirements.txt ADDED Viewed

	@@ -0,0 +1,180 @@

+aiofiles==23.2.1
+aiohttp==3.8.5
+aiosignal==1.3.1
+altair==5.0.1
+annotated-types==0.5.0
+antlr4-python3-runtime==4.9.3
+anyio==3.7.1
+appdirs==1.4.4
+asttokens==2.2.1
+async-timeout==4.0.3
+attrs==23.1.0
+audiocraft==0.0.2
+audioread==3.0.0
+av==10.0.0
+backcall==0.2.0
+blis==0.7.10
+catalogue==2.0.9
+certifi==2023.7.22
+cffi==1.15.1
+charset-normalizer==3.2.0
+click==8.1.6
+cloudpickle==2.2.1
+cmake==3.27.1
+colorlog==6.7.0
+comm==0.1.4
+confection==0.1.1
+contourpy==1.1.0
+cycler==0.11.0
+cymem==2.0.7
+Cython==3.0.0
+debugpy==1.6.7.post1
+decorator==5.1.1
+demucs==4.0.0
+diffq==0.2.4
+docopt==0.6.2
+dora-search==0.1.12
+einops==0.6.1
+exceptiongroup==1.1.2
+executing==1.2.0
+fastapi==0.101.0
+ffmpy==0.3.1
+filelock==3.12.2
+flashy==0.0.2
+fonttools==4.42.0
+frozenlist==1.4.0
+fsspec==2023.6.0
+gradio==3.40.1
+gradio_client==0.4.0
+h11==0.14.0
+httpcore==0.17.3
+httpx==0.24.1
+huggingface-hub==0.16.4
+hydra-colorlog==1.2.0
+hydra-core==1.3.2
+idna==3.4
+importlib-metadata==6.8.0
+importlib-resources==6.0.1
+ipykernel==6.25.1
+ipython==8.14.0
+jedi==0.19.0
+Jinja2==3.1.2
+joblib==1.3.2
+jsonschema==4.19.0
+jsonschema-specifications==2023.7.1
+julius==0.2.7
+jupyter_client==8.3.0
+jupyter_core==5.3.1
+kiwisolver==1.4.4
+lameenc==1.5.1
+langcodes==3.3.0
+lazy_loader==0.3
+librosa==0.10.0.post2
+linkify-it-py==2.0.2
+lit==16.0.6
+llvmlite==0.40.1
+markdown-it-py==2.2.0
+MarkupSafe==2.1.3
+matplotlib==3.7.2
+matplotlib-inline==0.1.6
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.0.5
+multidict==6.0.4
+murmurhash==1.0.9
+mypy-extensions==1.0.0
+nest-asyncio==1.5.7
+networkx==3.1
+num2words==0.5.12
+numba==0.57.1
+numpy==1.24.4
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+omegaconf==2.3.0
+openunmix==1.2.1
+orjson==3.9.4
+packaging==23.1
+pandas==2.0.3
+parso==0.8.3
+pathy==0.10.2
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==10.0.0
+platformdirs==3.10.0
+pooch==1.6.0
+preshed==3.0.8
+prompt-toolkit==3.0.39
+psutil==5.9.5
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pycparser==2.21
+pydantic==1.10.12
+pydantic_core==2.4.0
+pydub==0.25.1
+Pygments==2.16.1
+pyparsing==3.0.9
+pyre-extensions==0.0.29
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-multipart==0.0.6
+pytz==2023.3
+PyYAML==6.0.1
+pyzmq==25.1.1
+referencing==0.30.2
+regex==2023.8.8
+requests==2.31.0
+retrying==1.3.4
+rpds-py==0.9.2
+safetensors==0.3.2
+scikit-learn==1.3.0
+scipy==1.11.1
+semantic-version==2.10.0
+sentencepiece==0.1.99
+six==1.16.0
+smart-open==6.3.0
+sniffio==1.3.0
+soundfile==0.12.1
+soxr==0.3.5
+spacy==3.5.2
+spacy-legacy==3.0.12
+spacy-loggers==1.0.4
+srsly==2.4.7
+stack-data==0.6.2
+starlette==0.27.0
+submitit==1.4.5
+sympy==1.12
+thinc==8.1.12
+threadpoolctl==3.2.0
+tokenizers==0.13.3
+toolz==0.12.0
+torch==2.0.1
+torchaudio==2.0.2
+tornado==6.3.2
+tqdm==4.66.1
+traitlets==5.9.0
+transformers==4.31.0
+treetable==0.2.5
+triton==2.0.0
+typer==0.7.0
+typing-inspect==0.9.0
+typing_extensions==4.7.1
+tzdata==2023.3
+uc-micro-py==1.0.2
+urllib3==2.0.4
+uvicorn==0.23.2
+wasabi==1.1.2
+wcwidth==0.2.6
+websockets==11.0.3
+xformers==0.0.20
+yarl==1.9.2
+zipp==3.16.2

utils.py CHANGED Viewed

@@ -1,19 +1,3 @@
-# ---
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py:light
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.1
-#   kernelspec:
-#     display_name: Python 3 (ipykernel)
-#     language: python
-#     name: python3
-# ---
-# +
 import os
 import sys
 import time
@@ -22,7 +6,8 @@ import json
 import random
 import requests
 from voice import voice_dict
 OPENAPI_KEY = os.getenv('OPENAPI_KEY')
 CLOVA_VOICE_Client_ID = os.getenv('CLOVA_VOICE_Client_ID')
 CLOVA_VOICE_Client_Secret = os.getenv('CLOVA_VOICE_Client_Secret')
@@ -32,25 +17,31 @@ mubert_pat = os.getenv('mubert_pat')
 SUMMARY_Client_ID = os.getenv('SUMMARY_Client_ID')
 SUMMARY_Client_Secret = os.getenv('SUMMARY_Client_Secret')
-def get_story(first_sentence:str, num_sentences:int):
-    response = requests.post("https://api.openai.com/v1/chat/completions",
-                            headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
-                            data=json.dumps({
-                                "model": "gpt-3.5-turbo",
-                                "messages": [{"role": "system", "content": "You are a helpful assistant."},
-                                            {"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
-                                            \n\nFirst sentence: {first_sentence}"""}]
-                            }))
-    return response.json()['choices'][0]['message']['content']
-def get_voice(input_text:str, gender:str="female", age_group:str="youth", filename="voice.mp3"):
     """
     gender: female or male
     age_group: child, teenager, youth, middle_aged
     """
     speaker = random.choice(voice_dict[gender][age_group])
-    data = {"speaker":speaker, "text":input_text}
     url = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts"
     headers = {
         "X-NCP-APIGW-API-KEY-ID": CLOVA_VOICE_Client_ID,
@@ -112,12 +103,13 @@ def get_summary(input_text:str, summary_count:int = 5):
     response = requests.post(url, headers=headers, data=json.dumps(data))
     if response.status_code == 200:
         return ' '.join(response.json()['summary'].split('\n'))
     else:
         print("Error Code: " + str(response.status_code))
         print("Error Message: " + str(response.json()))
-def get_music(text, duration=300):
     print('original text length: ', len(text))
     summary = get_summary(text, 3)
     print('summary text length: ', len(summary))
@@ -186,6 +178,50 @@ def get_music(text, duration=300):
         print(f"{local_filename} 파일이 저장되었습니다.")
         return local_filename
 def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
     history.append(first_sentence)
@@ -224,24 +260,24 @@ def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
                 yield chat, history, response
-def get_voice_filename(text, gender, age):
     filename = None
     if gender == '남성':
         if age == "어린이":
-            filename = get_voice(text, gender="male", age_group="child", filename="voice.mp3")
         elif age == "청소년":
-            filename = get_voice(text, gender="male", age_group="teenager", filename="voice.mp3")
         elif age == "청년":
-            filename = get_voice(text, gender="male", age_group="youth", filename="voice.mp3")
         elif age == "중년":
-            filename = get_voice(text, gender="male", age_group="middle_aged", filename="voice.mp3")
     else:
         if age == "어린이":
-            filename = get_voice(text, gender="female", age_group="child", filename="voice.mp3")
         elif age == "청소년":
-            filename = get_voice(text, gender="female", age_group="teenager", filename="voice.mp3")
         elif age == "청년":
-            filename = get_voice(text, gender="female", age_group="youth", filename="voice.mp3")
         elif age == "중년":
-            filename = get_voice(text, gender="female", age_group="middle_aged", filename="voice.mp3")
-    return filename

 import os
 import sys
 import time
 import random
 import requests
 from voice import voice_dict
+from dotenv import load_dotenv
+load_dotenv('credentials.env')
 OPENAPI_KEY = os.getenv('OPENAPI_KEY')
 CLOVA_VOICE_Client_ID = os.getenv('CLOVA_VOICE_Client_ID')
 CLOVA_VOICE_Client_Secret = os.getenv('CLOVA_VOICE_Client_Secret')
 SUMMARY_Client_ID = os.getenv('SUMMARY_Client_ID')
 SUMMARY_Client_Secret = os.getenv('SUMMARY_Client_Secret')
+import time
+import os
+import subprocess
+from tempfile import NamedTemporaryFile
+import torch
+from audiocraft.data.audio import audio_write
+from audiocraft.models import MusicGen
+# Using small model, better results would be obtained with `medium` or `large`.
+model = MusicGen.get_pretrained('melody')
+model.set_generation_params(
+    use_sampling=True,
+    top_k=250,
+    duration=30
+)
+def get_voice(input_text:str, gender:str="female", age_group:str="youth", speed:int=1, pitch:int=1, alpha:int=-1, filename="voice.mp3"):
     """
     gender: female or male
     age_group: child, teenager, youth, middle_aged
     """
     speaker = random.choice(voice_dict[gender][age_group])
+    data = {"speaker":speaker, "text":input_text, 'speed':speed, 'pitch':pitch, 'alpha':alpha}
     url = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts"
     headers = {
         "X-NCP-APIGW-API-KEY-ID": CLOVA_VOICE_Client_ID,
     response = requests.post(url, headers=headers, data=json.dumps(data))
     if response.status_code == 200:
         return ' '.join(response.json()['summary'].split('\n'))
+    elif response.status_code == 400 and response.json()['error']['errorCode'] == 'E100':
+        return input_text
     else:
         print("Error Code: " + str(response.status_code))
         print("Error Message: " + str(response.json()))
+def get_mubert_music(text, duration=300):
     print('original text length: ', len(text))
     summary = get_summary(text, 3)
     print('summary text length: ', len(summary))
         print(f"{local_filename} 파일이 저장되었습니다.")
         return local_filename
+def get_musicgen_music(text, duration=300):
+    file_name = 'musicgen_output.wav'
+    print('original text length: ', len(text))
+    summary = get_summary(text, 3)
+    print('summary text length: ', len(summary))
+    translated_text = translate_text(summary)
+    print('translated_text length: ', len(translated_text))
+    if len(translated_text) > 200:
+        translated_text = translated_text[:200]
+    print(translated_text)
+    start = time.time()
+    overlap = 5
+    music_length = 30
+    target_length = duration
+    desc = [translated_text]
+    print(model.sample_rate)
+    output = model.generate(descriptions=desc, progress=True)
+    while music_length < target_length:
+        last_sec = output[:, :, int(-overlap*model.sample_rate):]
+        cont = model.generate_continuation(last_sec, model.sample_rate, descriptions=desc, progress=True)
+        output = torch.cat([output[:, :, :int(-overlap*model.sample_rate)], cont], 2)
+        music_length = output.shape[2] / model.sample_rate
+    if music_length > target_length:
+        output = output[:, :, :int(target_length*model.sample_rate)]
+    output = output.detach().cpu().float()[0]
+    audio_write(
+        file_name, output, model.sample_rate, strategy="loudness",
+        loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
+    print(f'Elapsed time: {time.time() - start}')
+    return file_name
+# def get_story(first_sentence:str, history, num_sentences:int):
+#     response = requests.post("https://api.openai.com/v1/chat/completions",
+#                             headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
+#                             data=json.dumps({
+#                                 "model": "gpt-3.5-turbo",
+#                                 "messages": [{"role": "system", "content": "You are a helpful assistant."},
+#                                             {"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
+#                                             \n\nStory: {first_sentence}"""}]
+#                             }))
+#     print(response.json())
+#     return response.json()['choices'][0]['message']['content']
 def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
     history.append(first_sentence)
                 yield chat, history, response
+def get_voice_filename(text, gender, age, speed, pitch, alpha):
     filename = None
     if gender == '남성':
         if age == "어린이":
+            filename = get_voice(text, gender="male", age_group="child", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
         elif age == "청소년":
+            filename = get_voice(text, gender="male", age_group="teenager", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
         elif age == "청년":
+            filename = get_voice(text, gender="male", age_group="youth", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
         elif age == "중년":
+            filename = get_voice(text, gender="male", age_group="middle_aged", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
     else:
         if age == "어린이":
+            filename = get_voice(text, gender="female", age_group="child", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
         elif age == "청소년":
+            filename = get_voice(text, gender="female", age_group="teenager", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
         elif age == "청년":
+            filename = get_voice(text, gender="female", age_group="youth", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
         elif age == "중년":
+            filename = get_voice(text, gender="female", age_group="middle_aged", speed=speed, pitch=pitch, alpha=alpha, filename="voice.mp3")
+    return filename