RVC_RULE1

Sleeping

App Files Files Community

sjufan84 commited on Oct 21, 2023

Commit

999d347

1 Parent(s): a623780

cleaned up UI

Browse files

Files changed (1) hide show

app.py +112 -115

app.py CHANGED Viewed

@@ -1553,16 +1553,38 @@ def zip_downloader(model):
     else:
         return f'./weights/{model}.pth', "Could not find Index file."
-with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2') as app:
     global person
     with gr.Tabs():
         with gr.TabItem("Inference"):
-            gr.HTML("<h1>  RVC_RULE1 -- Humans First  </h1>")
-            gr.HTML('<h4> Make sure you hit "Refresh" to load all of the models.\
-                    Once you have chosen "joel.pth" for Joel or "jenny.pth" for Jenny,\
-                    make sure the index setting to the right lines up with the chosen model.')
-            gr.HTML("<h10>   Huggingface version v1 -- DT   </h10>")
-           # gr.HTML("<h4>  If you want to use this space privately, I recommend you duplicate the space.  </h4>")
             # Inference Preset Row
             # with gr.Row():
@@ -1574,111 +1596,78 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
             # Other RVC stuff
             with gr.Row():
-                sid0 = gr.Dropdown(label="Choose your Model.", choices=sorted(names), value="joel.pth")
-                refresh_button = gr.Button("Refresh", variant="primary")
-                if check_for_name() != '':
-                    get_vc(sorted(names)[0])
-                vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.\
                                 For male to female conversions, or vice versa, swap the voice first and then\
                                 adjust the pitch after you get a baseline.", value=0)
-                #clean_button = gr.Button(i18n("Uninstall the sound saving video memory"), variant="primary")
-                spk_item = gr.Slider(
-                    minimum=0,
-                    maximum=2333,
-                    step=1,
-                    label=i18n("To uninstall please select Speaker ID Timbre to save the video memory"),
-                    value=0,
-                    visible=False,
-                    interactive=True,
-                )
-                #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
-                sid0.change(
-                    fn=get_vc,
-                    inputs=[sid0],
-                    outputs=[spk_item],
-                )
-                but0 = gr.Button("Convert", variant="primary")
             with gr.Row():
                 with gr.Column():
-                    with gr.Row():
-                        dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
-                    with gr.Row():
-                        record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
-                    with gr.Row():
-                        input_audio0 = gr.Dropdown(
-                            label="2.Choose your audio.  Hit refresh if you do not see all of your\
-                            clips.  Recorded audio will be saved as a wav file with the timestamp of\
-                            when you recorded it.",
-                            value="./audios/someguy.mp3",
-                            choices=audio_files
-                            )
-                        dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
-                        dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
-                        refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
-                        record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
-                        record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
-                    #with gr.Row():
-                    #    with gr.Accordion('Text To Speech', open=False):
-                    #        with gr.Column():
-                    #            lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en')
-                    #            api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='')
-                    #            elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices)
-                    #        with gr.Column():
-                    #        tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.")
-                    #            tts_button = gr.Button(value="Speak")
-                    #            tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0])
-                    with gr.Row():
-                    #    with gr.Accordion('Wav2Lip', open=False):
-                    #        with gr.Row():
-                    #            size = gr.Radio(label='Resolution:',choices=['Half','Full'])
-                    #            face = gr.UploadButton("Upload A Character",type='file')
-                    #            faces = gr.Dropdown(label="OR Choose one:", choices=['None','Ben Shapiro','Andrew Tate'])
-                    #        with gr.Row():
-                    #            preview = gr.Textbox(label="Status:",interactive=False)
-                    #            face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
-                        with gr.Row():
-                            animation = gr.Video(type='filepath')
-                            refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])                    #        with gr.Row():
-                            animate_button = gr.Button('Animate')
                 with gr.Column():
-                    with gr.Accordion("Index Settings", open=False):
-                        file_index1 = gr.Dropdown(
-                            label="3. Path to your added.index file (if it didn't automatically find it.)",
-                            choices=[match_index(sid0)] if file_index else match_index("joel.pth"),
-                            value=match_index("joel.pth"),
-                            interactive=True,
-                            )
-                        sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
-                        refresh_button.click(
-                            fn=change_choices, inputs=[], outputs=[sid0, file_index1]
-                            )
-                        # file_big_npy1 = gr.Textbox(
-                        #     label=i18n("特征文件路径"),
-                        #     value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
-                        #     interactive=True,
-                        # )
-                        index_rate1 = gr.Slider(
-                            minimum=0,
-                            maximum=1,
-                            label=i18n("The proportion of features retrieved"),
-                            value=0.66,
-                            interactive=True,
-                            )
-                    with gr.Row():
-                        vc_output2 = gr.Audio(
-                            label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
-                            type='filepath',
-                            interactive=False,
                         )
-                    with gr.Row():
-                        # Create a new button to calculate the similarity score
-                        similarity_button = gr.Button("Calculate Similarity Score", variant="primary")
-                    with gr.Row():
-                        similarity_score = gr.Textbox(label="Similarity Score", type="text", interactive=False)
-                        similarity_button.click(fn=calculate_similarity_score, inputs=[input_audio0, file_index1, sid0], outputs=[similarity_score])
-                        print(file_index1)
-                        #animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
                     with gr.Accordion("Advanced Settings", open=False):
                         f0method0 = gr.Radio(
                             label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
@@ -1686,12 +1675,13 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
                             value="rmvpe",
                             interactive=True,
                         )
                         crepe_hop_length = gr.Slider(
                             minimum=1,
                             maximum=512,
                             step=1,
-                            label="Mangio-Crepe Hop Length. Higher numbers will reduce the chance of extreme pitch changes but lower numbers will increase accuracy. 64-192 is a good range to experiment with.",
                             value=120,
                             interactive=True,
                             visible=False,
@@ -1782,12 +1772,24 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
                         formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
                         frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
                         formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
             with gr.Row():
-                vc_output1 = gr.Textbox("")
                 f0_file = gr.File(label=i18n("Retrieve feature occupancy F0 curve files, optionally,\
                                             one pitch per line, instead of the default F0 and the upward\
                                             and downward adjustment stop of the small white copy path with\
                                             spaces at the beginning and end and carriage return ratio."), visible=False)
                 but0.click(
                     vc_single,
@@ -1810,7 +1812,7 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
                     [vc_output1, vc_output2],
                 )
-            with gr.Accordion("Batch Conversion",open=False):
                 with gr.Row():
                     with gr.Column():
                         vc_transform1 = gr.Number(
@@ -1858,11 +1860,6 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
                             inputs=[],
                             outputs=file_index4,
                         )
-                        # file_big_npy2 = gr.Textbox(
-                        #     label=i18n("特征文件路径"),
-                        #     value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
-                        #     interactive=True,
-                        # )
                         index_rate2 = gr.Slider(
                             minimum=0,
                             maximum=1,

     else:
         return f'./weights/{model}.pth', "Could not find Index file."
+theme = gr.Theme(primary_hue="blue", secondary_hue="gray", neutral_hue="gray")
+with gr.Blocks(title='RVC First Rule v1', theme=theme) as app:
     global person
+    spk_item = gr.Slider(
+                minimum=0,
+                maximum=2333,
+                step=1,
+                label=i18n("To uninstall please select Speaker ID Timbre to save the video memory"),
+                value=0,
+                visible=False,
+                interactive=True,
+            )
     with gr.Tabs():
         with gr.TabItem("Inference"):
+            gr.HTML("<h1>  First Rule -- Humananity First  </h1>")
+            gr.HTML("""<h3> A few notes on the flow of the vocal cloning features:
+                    First, an audio clip needs to be either recorded or uploaded.  These should
+                    be vocals only, preferable under 30 seconds in length.  Longer clips
+                    can be cloned, but will take longer.  Second, a model needs to be chosen --
+                    either Jenny's ("jenny.pth") or Joel's ("joel.pth").  If you do not see them as options,
+                    make sure to "Refresh" the model choices.  Initially, simply
+                    clone the clip to set a baseline.  Then, you can play around with the adjustment settings.
+                    This is especially useful for male to female or female to male conversions
+                    adjustment settings.  This is especially useful for male to female or female
+                    to male conversions.  There are other more advanced settings, including proportion
+                    of features retrieved, which can be used to adjust how closely the algorithm with
+                    match features from one voice to another.  The default settings usually work well,
+                    but you can certainly play around with this to get different results.  Most of all,
+                    have fun! </h3>
+                    """)
+            gr.HTML("<h10>   Huggingface version v1 -- DT   </h10>")
+            # gr.HTML("<h4>  If you want to use this space privately, I recommend you duplicate the space.  </h4>")
             # Inference Preset Row
             # with gr.Row():
             # Other RVC stuff
             with gr.Row():
+                with gr.Column():
+                    dropbox=gr.File(label="Drop your audio here & hit the Reload button.")
+                with gr.Column():
+                    vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.\
                                 For male to female conversions, or vice versa, swap the voice first and then\
                                 adjust the pitch after you get a baseline.", value=0)
+                    sid0 = gr.Dropdown(label="Choose your Model.", choices=sorted(names), value="joel.pth")
+                    sid0.change(
+                        fn=get_vc,
+                        inputs=[sid0],
+                        outputs=[spk_item],
+                    )
+                    refresh_button = gr.Button("Refresh Model List", variant="primary")
+                    if check_for_name() != '':
+                        get_vc(sorted(names)[0])
+                    file_index1 = gr.Dropdown(
+                        label="3. Path to your added.index file (if it didn't automatically find it.)",
+                        choices=[match_index(sid0)] if file_index else match_index("joel.pth"),
+                        value=match_index("joel.pth"),
+                        interactive=True,
+                        visible=False
+                        )
+                    sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
+                    refresh_button.click(
+                        fn=change_choices, inputs=[], outputs=[sid0, file_index1]
+                        )
+                    # file_big_npy1 = gr.Textbox(
+                    #     label=i18n("特征文件路径"),
+                    #     value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
+                    #     interactive=True,
+                    # )
             with gr.Row():
                 with gr.Column():
+                    record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
                 with gr.Column():
+                    index_rate1 = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=i18n("The proportion of features retrieved"),
+                        value=0.66,
+                        interactive=True,
+                    )
+            with gr.Row():
+                with gr.Column():
+                    input_audio0 = gr.Dropdown(
+                        label="2.Choose your audio.  Hit refresh if you do not see all of your\
+                        clips.  Recorded audio will be saved as a wav file with the timestamp of\
+                        when you recorded it.",
+                        value="./audios/someguy.mp3",
+                        choices=audio_files
                         )
+                    dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
+                    dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
+                with gr.Column():
+                    vc_output2 = gr.Audio(
+                        label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
+                        type='filepath',
+                        interactive=False,
+                    )
+            with gr.Row():
+                with gr.Column():
+                    refresh_button2 = gr.Button("Refresh Audio Files", variant="primary")
+                    record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
+                    record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
+                with gr.Column():
+                    but0 = gr.Button("Clone the clip", variant="primary")
+                    #clean_button = gr.Button(i18n("Uninstall the sound saving video memory"), variant="primary")
+                    #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
+            with gr.Row(equal_height=True):
+                with gr.Column():
+                    gr.Textbox(label="", value="Coming Soon... Real Time Text to Speech!")
+                with gr.Column():
                     with gr.Accordion("Advanced Settings", open=False):
                         f0method0 = gr.Radio(
                             label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
                             value="rmvpe",
                             interactive=True,
                         )
                         crepe_hop_length = gr.Slider(
                             minimum=1,
                             maximum=512,
                             step=1,
+                            label="Mangio-Crepe Hop Length. Higher numbers will reduce the\
+                            chance of extreme pitch changes but lower numbers will increase\
+                            accuracy. 64-192 is a good range to experiment with.",
                             value=120,
                             interactive=True,
                             visible=False,
                         formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
                         frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
                         formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
+                        animation = gr.Video(type='filepath', visible=False)
+                        refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])                    #        with gr.Row():
+                        animate_button = gr.Button('Animate', visible=False)
             with gr.Row():
                 f0_file = gr.File(label=i18n("Retrieve feature occupancy F0 curve files, optionally,\
                                             one pitch per line, instead of the default F0 and the upward\
                                             and downward adjustment stop of the small white copy path with\
                                             spaces at the beginning and end and carriage return ratio."), visible=False)
+                vc_output1 = gr.Textbox("", visible=False)
+                tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.", visible=False)
+                tts_button = gr.Button(value="Speak", visible=False)
+                lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',
+                            choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en', visible=False)
+                api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='', visible=False)
+                elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices, visible=False)
+                tts_button.click(fn=elevenTTS, inputs=[api_box, tfs, elevenid, lang], outputs=[record_button, input_audio0])
+                with gr.Accordion('Text To Speech', open=False, visible=False):
+                    gr.HTML("<h3>  Text To Speech  </h3>")
                 but0.click(
                     vc_single,
                     [vc_output1, vc_output2],
                 )
+            with gr.Accordion("Batch Conversion",open=False, visible=False):
                 with gr.Row():
                     with gr.Column():
                         vc_transform1 = gr.Number(
                             inputs=[],
                             outputs=file_index4,
                         )
                         index_rate2 = gr.Slider(
                             minimum=0,
                             maximum=1,