David Thomas
commited on
Commit
·
999d347
1
Parent(s):
a623780
cleaned up UI
Browse files
app.py
CHANGED
@@ -1553,16 +1553,38 @@ def zip_downloader(model):
|
|
1553 |
else:
|
1554 |
return f'./weights/{model}.pth', "Could not find Index file."
|
1555 |
|
1556 |
-
|
|
|
1557 |
global person
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1558 |
with gr.Tabs():
|
1559 |
with gr.TabItem("Inference"):
|
1560 |
-
gr.HTML("<h1>
|
1561 |
-
gr.HTML(
|
1562 |
-
|
1563 |
-
|
1564 |
-
|
1565 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1566 |
|
1567 |
# Inference Preset Row
|
1568 |
# with gr.Row():
|
@@ -1574,111 +1596,78 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
|
|
1574 |
|
1575 |
# Other RVC stuff
|
1576 |
with gr.Row():
|
1577 |
-
|
1578 |
-
|
1579 |
-
|
1580 |
-
|
1581 |
-
vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.\
|
1582 |
For male to female conversions, or vice versa, swap the voice first and then\
|
1583 |
adjust the pitch after you get a baseline.", value=0)
|
1584 |
-
|
1585 |
-
|
1586 |
-
|
1587 |
-
|
1588 |
-
|
1589 |
-
|
1590 |
-
|
1591 |
-
|
1592 |
-
|
1593 |
-
|
1594 |
-
|
1595 |
-
|
1596 |
-
|
1597 |
-
|
1598 |
-
|
1599 |
-
|
1600 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1601 |
with gr.Row():
|
1602 |
with gr.Column():
|
1603 |
-
|
1604 |
-
dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
|
1605 |
-
with gr.Row():
|
1606 |
-
record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
|
1607 |
-
with gr.Row():
|
1608 |
-
input_audio0 = gr.Dropdown(
|
1609 |
-
label="2.Choose your audio. Hit refresh if you do not see all of your\
|
1610 |
-
clips. Recorded audio will be saved as a wav file with the timestamp of\
|
1611 |
-
when you recorded it.",
|
1612 |
-
value="./audios/someguy.mp3",
|
1613 |
-
choices=audio_files
|
1614 |
-
)
|
1615 |
-
dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
|
1616 |
-
dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1617 |
-
refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
|
1618 |
-
record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
|
1619 |
-
record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1620 |
-
#with gr.Row():
|
1621 |
-
# with gr.Accordion('Text To Speech', open=False):
|
1622 |
-
# with gr.Column():
|
1623 |
-
# lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en')
|
1624 |
-
# api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='')
|
1625 |
-
# elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices)
|
1626 |
-
# with gr.Column():
|
1627 |
-
# tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.")
|
1628 |
-
# tts_button = gr.Button(value="Speak")
|
1629 |
-
# tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0])
|
1630 |
-
with gr.Row():
|
1631 |
-
# with gr.Accordion('Wav2Lip', open=False):
|
1632 |
-
# with gr.Row():
|
1633 |
-
# size = gr.Radio(label='Resolution:',choices=['Half','Full'])
|
1634 |
-
# face = gr.UploadButton("Upload A Character",type='file')
|
1635 |
-
# faces = gr.Dropdown(label="OR Choose one:", choices=['None','Ben Shapiro','Andrew Tate'])
|
1636 |
-
# with gr.Row():
|
1637 |
-
# preview = gr.Textbox(label="Status:",interactive=False)
|
1638 |
-
# face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
|
1639 |
-
with gr.Row():
|
1640 |
-
animation = gr.Video(type='filepath')
|
1641 |
-
refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation]) # with gr.Row():
|
1642 |
-
animate_button = gr.Button('Animate')
|
1643 |
-
|
1644 |
with gr.Column():
|
1645 |
-
|
1646 |
-
|
1647 |
-
|
1648 |
-
|
1649 |
-
|
1650 |
-
|
1651 |
-
|
1652 |
-
|
1653 |
-
|
1654 |
-
|
1655 |
-
|
1656 |
-
|
1657 |
-
|
1658 |
-
|
1659 |
-
|
1660 |
-
# )
|
1661 |
-
index_rate1 = gr.Slider(
|
1662 |
-
minimum=0,
|
1663 |
-
maximum=1,
|
1664 |
-
label=i18n("The proportion of features retrieved"),
|
1665 |
-
value=0.66,
|
1666 |
-
interactive=True,
|
1667 |
-
)
|
1668 |
-
with gr.Row():
|
1669 |
-
vc_output2 = gr.Audio(
|
1670 |
-
label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
|
1671 |
-
type='filepath',
|
1672 |
-
interactive=False,
|
1673 |
)
|
1674 |
-
|
1675 |
-
|
1676 |
-
|
1677 |
-
|
1678 |
-
|
1679 |
-
|
1680 |
-
|
1681 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1682 |
with gr.Accordion("Advanced Settings", open=False):
|
1683 |
f0method0 = gr.Radio(
|
1684 |
label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
|
@@ -1686,12 +1675,13 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
|
|
1686 |
value="rmvpe",
|
1687 |
interactive=True,
|
1688 |
)
|
1689 |
-
|
1690 |
crepe_hop_length = gr.Slider(
|
1691 |
minimum=1,
|
1692 |
maximum=512,
|
1693 |
step=1,
|
1694 |
-
label="Mangio-Crepe Hop Length. Higher numbers will reduce the
|
|
|
|
|
1695 |
value=120,
|
1696 |
interactive=True,
|
1697 |
visible=False,
|
@@ -1782,12 +1772,24 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
|
|
1782 |
formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
|
1783 |
frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
|
1784 |
formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
|
|
|
|
|
|
|
1785 |
with gr.Row():
|
1786 |
-
vc_output1 = gr.Textbox("")
|
1787 |
f0_file = gr.File(label=i18n("Retrieve feature occupancy F0 curve files, optionally,\
|
1788 |
one pitch per line, instead of the default F0 and the upward\
|
1789 |
and downward adjustment stop of the small white copy path with\
|
1790 |
spaces at the beginning and end and carriage return ratio."), visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1791 |
|
1792 |
but0.click(
|
1793 |
vc_single,
|
@@ -1810,7 +1812,7 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
|
|
1810 |
[vc_output1, vc_output2],
|
1811 |
)
|
1812 |
|
1813 |
-
with gr.Accordion("Batch Conversion",open=False):
|
1814 |
with gr.Row():
|
1815 |
with gr.Column():
|
1816 |
vc_transform1 = gr.Number(
|
@@ -1858,11 +1860,6 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
|
|
1858 |
inputs=[],
|
1859 |
outputs=file_index4,
|
1860 |
)
|
1861 |
-
# file_big_npy2 = gr.Textbox(
|
1862 |
-
# label=i18n("特征文件路径"),
|
1863 |
-
# value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
|
1864 |
-
# interactive=True,
|
1865 |
-
# )
|
1866 |
index_rate2 = gr.Slider(
|
1867 |
minimum=0,
|
1868 |
maximum=1,
|
|
|
1553 |
else:
|
1554 |
return f'./weights/{model}.pth', "Could not find Index file."
|
1555 |
|
1556 |
+
theme = gr.Theme(primary_hue="blue", secondary_hue="gray", neutral_hue="gray")
|
1557 |
+
with gr.Blocks(title='RVC First Rule v1', theme=theme) as app:
|
1558 |
global person
|
1559 |
+
spk_item = gr.Slider(
|
1560 |
+
minimum=0,
|
1561 |
+
maximum=2333,
|
1562 |
+
step=1,
|
1563 |
+
label=i18n("To uninstall please select Speaker ID Timbre to save the video memory"),
|
1564 |
+
value=0,
|
1565 |
+
visible=False,
|
1566 |
+
interactive=True,
|
1567 |
+
)
|
1568 |
with gr.Tabs():
|
1569 |
with gr.TabItem("Inference"):
|
1570 |
+
gr.HTML("<h1> First Rule -- Humananity First </h1>")
|
1571 |
+
gr.HTML("""<h3> A few notes on the flow of the vocal cloning features:
|
1572 |
+
First, an audio clip needs to be either recorded or uploaded. These should
|
1573 |
+
be vocals only, preferable under 30 seconds in length. Longer clips
|
1574 |
+
can be cloned, but will take longer. Second, a model needs to be chosen --
|
1575 |
+
either Jenny's ("jenny.pth") or Joel's ("joel.pth"). If you do not see them as options,
|
1576 |
+
make sure to "Refresh" the model choices. Initially, simply
|
1577 |
+
clone the clip to set a baseline. Then, you can play around with the adjustment settings.
|
1578 |
+
This is especially useful for male to female or female to male conversions
|
1579 |
+
adjustment settings. This is especially useful for male to female or female
|
1580 |
+
to male conversions. There are other more advanced settings, including proportion
|
1581 |
+
of features retrieved, which can be used to adjust how closely the algorithm with
|
1582 |
+
match features from one voice to another. The default settings usually work well,
|
1583 |
+
but you can certainly play around with this to get different results. Most of all,
|
1584 |
+
have fun! </h3>
|
1585 |
+
""")
|
1586 |
+
gr.HTML("<h10> Huggingface version v1 -- DT </h10>")
|
1587 |
+
# gr.HTML("<h4> If you want to use this space privately, I recommend you duplicate the space. </h4>")
|
1588 |
|
1589 |
# Inference Preset Row
|
1590 |
# with gr.Row():
|
|
|
1596 |
|
1597 |
# Other RVC stuff
|
1598 |
with gr.Row():
|
1599 |
+
with gr.Column():
|
1600 |
+
dropbox=gr.File(label="Drop your audio here & hit the Reload button.")
|
1601 |
+
with gr.Column():
|
1602 |
+
vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.\
|
|
|
1603 |
For male to female conversions, or vice versa, swap the voice first and then\
|
1604 |
adjust the pitch after you get a baseline.", value=0)
|
1605 |
+
sid0 = gr.Dropdown(label="Choose your Model.", choices=sorted(names), value="joel.pth")
|
1606 |
+
sid0.change(
|
1607 |
+
fn=get_vc,
|
1608 |
+
inputs=[sid0],
|
1609 |
+
outputs=[spk_item],
|
1610 |
+
)
|
1611 |
+
refresh_button = gr.Button("Refresh Model List", variant="primary")
|
1612 |
+
if check_for_name() != '':
|
1613 |
+
get_vc(sorted(names)[0])
|
1614 |
+
file_index1 = gr.Dropdown(
|
1615 |
+
label="3. Path to your added.index file (if it didn't automatically find it.)",
|
1616 |
+
choices=[match_index(sid0)] if file_index else match_index("joel.pth"),
|
1617 |
+
value=match_index("joel.pth"),
|
1618 |
+
interactive=True,
|
1619 |
+
visible=False
|
1620 |
+
)
|
1621 |
+
sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
|
1622 |
+
refresh_button.click(
|
1623 |
+
fn=change_choices, inputs=[], outputs=[sid0, file_index1]
|
1624 |
+
)
|
1625 |
+
# file_big_npy1 = gr.Textbox(
|
1626 |
+
# label=i18n("特征文件路径"),
|
1627 |
+
# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
|
1628 |
+
# interactive=True,
|
1629 |
+
# )
|
1630 |
with gr.Row():
|
1631 |
with gr.Column():
|
1632 |
+
record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1633 |
with gr.Column():
|
1634 |
+
index_rate1 = gr.Slider(
|
1635 |
+
minimum=0,
|
1636 |
+
maximum=1,
|
1637 |
+
label=i18n("The proportion of features retrieved"),
|
1638 |
+
value=0.66,
|
1639 |
+
interactive=True,
|
1640 |
+
)
|
1641 |
+
with gr.Row():
|
1642 |
+
with gr.Column():
|
1643 |
+
input_audio0 = gr.Dropdown(
|
1644 |
+
label="2.Choose your audio. Hit refresh if you do not see all of your\
|
1645 |
+
clips. Recorded audio will be saved as a wav file with the timestamp of\
|
1646 |
+
when you recorded it.",
|
1647 |
+
value="./audios/someguy.mp3",
|
1648 |
+
choices=audio_files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1649 |
)
|
1650 |
+
dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
|
1651 |
+
dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1652 |
+
with gr.Column():
|
1653 |
+
vc_output2 = gr.Audio(
|
1654 |
+
label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
|
1655 |
+
type='filepath',
|
1656 |
+
interactive=False,
|
1657 |
+
)
|
1658 |
+
with gr.Row():
|
1659 |
+
with gr.Column():
|
1660 |
+
refresh_button2 = gr.Button("Refresh Audio Files", variant="primary")
|
1661 |
+
record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
|
1662 |
+
record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1663 |
+
with gr.Column():
|
1664 |
+
but0 = gr.Button("Clone the clip", variant="primary")
|
1665 |
+
#clean_button = gr.Button(i18n("Uninstall the sound saving video memory"), variant="primary")
|
1666 |
+
#clean_button.click(fn=clean, inputs=[], outputs=[sid0])
|
1667 |
+
with gr.Row(equal_height=True):
|
1668 |
+
with gr.Column():
|
1669 |
+
gr.Textbox(label="", value="Coming Soon... Real Time Text to Speech!")
|
1670 |
+
with gr.Column():
|
1671 |
with gr.Accordion("Advanced Settings", open=False):
|
1672 |
f0method0 = gr.Radio(
|
1673 |
label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
|
|
|
1675 |
value="rmvpe",
|
1676 |
interactive=True,
|
1677 |
)
|
|
|
1678 |
crepe_hop_length = gr.Slider(
|
1679 |
minimum=1,
|
1680 |
maximum=512,
|
1681 |
step=1,
|
1682 |
+
label="Mangio-Crepe Hop Length. Higher numbers will reduce the\
|
1683 |
+
chance of extreme pitch changes but lower numbers will increase\
|
1684 |
+
accuracy. 64-192 is a good range to experiment with.",
|
1685 |
value=120,
|
1686 |
interactive=True,
|
1687 |
visible=False,
|
|
|
1772 |
formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
|
1773 |
frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
|
1774 |
formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
|
1775 |
+
animation = gr.Video(type='filepath', visible=False)
|
1776 |
+
refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation]) # with gr.Row():
|
1777 |
+
animate_button = gr.Button('Animate', visible=False)
|
1778 |
with gr.Row():
|
|
|
1779 |
f0_file = gr.File(label=i18n("Retrieve feature occupancy F0 curve files, optionally,\
|
1780 |
one pitch per line, instead of the default F0 and the upward\
|
1781 |
and downward adjustment stop of the small white copy path with\
|
1782 |
spaces at the beginning and end and carriage return ratio."), visible=False)
|
1783 |
+
vc_output1 = gr.Textbox("", visible=False)
|
1784 |
+
tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.", visible=False)
|
1785 |
+
tts_button = gr.Button(value="Speak", visible=False)
|
1786 |
+
lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',
|
1787 |
+
choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en', visible=False)
|
1788 |
+
api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='', visible=False)
|
1789 |
+
elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices, visible=False)
|
1790 |
+
tts_button.click(fn=elevenTTS, inputs=[api_box, tfs, elevenid, lang], outputs=[record_button, input_audio0])
|
1791 |
+
with gr.Accordion('Text To Speech', open=False, visible=False):
|
1792 |
+
gr.HTML("<h3> Text To Speech </h3>")
|
1793 |
|
1794 |
but0.click(
|
1795 |
vc_single,
|
|
|
1812 |
[vc_output1, vc_output2],
|
1813 |
)
|
1814 |
|
1815 |
+
with gr.Accordion("Batch Conversion",open=False, visible=False):
|
1816 |
with gr.Row():
|
1817 |
with gr.Column():
|
1818 |
vc_transform1 = gr.Number(
|
|
|
1860 |
inputs=[],
|
1861 |
outputs=file_index4,
|
1862 |
)
|
|
|
|
|
|
|
|
|
|
|
1863 |
index_rate2 = gr.Slider(
|
1864 |
minimum=0,
|
1865 |
maximum=1,
|