David Thomas commited on
Commit
999d347
·
1 Parent(s): a623780

cleaned up UI

Browse files
Files changed (1) hide show
  1. app.py +112 -115
app.py CHANGED
@@ -1553,16 +1553,38 @@ def zip_downloader(model):
1553
  else:
1554
  return f'./weights/{model}.pth', "Could not find Index file."
1555
 
1556
- with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2') as app:
 
1557
  global person
 
 
 
 
 
 
 
 
 
1558
  with gr.Tabs():
1559
  with gr.TabItem("Inference"):
1560
- gr.HTML("<h1> RVC_RULE1 -- Humans First </h1>")
1561
- gr.HTML('<h4> Make sure you hit "Refresh" to load all of the models.\
1562
- Once you have chosen "joel.pth" for Joel or "jenny.pth" for Jenny,\
1563
- make sure the index setting to the right lines up with the chosen model.')
1564
- gr.HTML("<h10> Huggingface version v1 -- DT </h10>")
1565
- # gr.HTML("<h4> If you want to use this space privately, I recommend you duplicate the space. </h4>")
 
 
 
 
 
 
 
 
 
 
 
 
1566
 
1567
  # Inference Preset Row
1568
  # with gr.Row():
@@ -1574,111 +1596,78 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
1574
 
1575
  # Other RVC stuff
1576
  with gr.Row():
1577
- sid0 = gr.Dropdown(label="Choose your Model.", choices=sorted(names), value="joel.pth")
1578
- refresh_button = gr.Button("Refresh", variant="primary")
1579
- if check_for_name() != '':
1580
- get_vc(sorted(names)[0])
1581
- vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.\
1582
  For male to female conversions, or vice versa, swap the voice first and then\
1583
  adjust the pitch after you get a baseline.", value=0)
1584
- #clean_button = gr.Button(i18n("Uninstall the sound saving video memory"), variant="primary")
1585
- spk_item = gr.Slider(
1586
- minimum=0,
1587
- maximum=2333,
1588
- step=1,
1589
- label=i18n("To uninstall please select Speaker ID Timbre to save the video memory"),
1590
- value=0,
1591
- visible=False,
1592
- interactive=True,
1593
- )
1594
- #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
1595
- sid0.change(
1596
- fn=get_vc,
1597
- inputs=[sid0],
1598
- outputs=[spk_item],
1599
- )
1600
- but0 = gr.Button("Convert", variant="primary")
 
 
 
 
 
 
 
 
1601
  with gr.Row():
1602
  with gr.Column():
1603
- with gr.Row():
1604
- dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
1605
- with gr.Row():
1606
- record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
1607
- with gr.Row():
1608
- input_audio0 = gr.Dropdown(
1609
- label="2.Choose your audio. Hit refresh if you do not see all of your\
1610
- clips. Recorded audio will be saved as a wav file with the timestamp of\
1611
- when you recorded it.",
1612
- value="./audios/someguy.mp3",
1613
- choices=audio_files
1614
- )
1615
- dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
1616
- dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
1617
- refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
1618
- record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
1619
- record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
1620
- #with gr.Row():
1621
- # with gr.Accordion('Text To Speech', open=False):
1622
- # with gr.Column():
1623
- # lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en')
1624
- # api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='')
1625
- # elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices)
1626
- # with gr.Column():
1627
- # tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.")
1628
- # tts_button = gr.Button(value="Speak")
1629
- # tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0])
1630
- with gr.Row():
1631
- # with gr.Accordion('Wav2Lip', open=False):
1632
- # with gr.Row():
1633
- # size = gr.Radio(label='Resolution:',choices=['Half','Full'])
1634
- # face = gr.UploadButton("Upload A Character",type='file')
1635
- # faces = gr.Dropdown(label="OR Choose one:", choices=['None','Ben Shapiro','Andrew Tate'])
1636
- # with gr.Row():
1637
- # preview = gr.Textbox(label="Status:",interactive=False)
1638
- # face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
1639
- with gr.Row():
1640
- animation = gr.Video(type='filepath')
1641
- refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation]) # with gr.Row():
1642
- animate_button = gr.Button('Animate')
1643
-
1644
  with gr.Column():
1645
- with gr.Accordion("Index Settings", open=False):
1646
- file_index1 = gr.Dropdown(
1647
- label="3. Path to your added.index file (if it didn't automatically find it.)",
1648
- choices=[match_index(sid0)] if file_index else match_index("joel.pth"),
1649
- value=match_index("joel.pth"),
1650
- interactive=True,
1651
- )
1652
- sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
1653
- refresh_button.click(
1654
- fn=change_choices, inputs=[], outputs=[sid0, file_index1]
1655
- )
1656
- # file_big_npy1 = gr.Textbox(
1657
- # label=i18n("特征文件路径"),
1658
- # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1659
- # interactive=True,
1660
- # )
1661
- index_rate1 = gr.Slider(
1662
- minimum=0,
1663
- maximum=1,
1664
- label=i18n("The proportion of features retrieved"),
1665
- value=0.66,
1666
- interactive=True,
1667
- )
1668
- with gr.Row():
1669
- vc_output2 = gr.Audio(
1670
- label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
1671
- type='filepath',
1672
- interactive=False,
1673
  )
1674
- with gr.Row():
1675
- # Create a new button to calculate the similarity score
1676
- similarity_button = gr.Button("Calculate Similarity Score", variant="primary")
1677
- with gr.Row():
1678
- similarity_score = gr.Textbox(label="Similarity Score", type="text", interactive=False)
1679
- similarity_button.click(fn=calculate_similarity_score, inputs=[input_audio0, file_index1, sid0], outputs=[similarity_score])
1680
- print(file_index1)
1681
- #animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
 
 
 
 
 
 
 
 
 
 
 
 
 
1682
  with gr.Accordion("Advanced Settings", open=False):
1683
  f0method0 = gr.Radio(
1684
  label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
@@ -1686,12 +1675,13 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
1686
  value="rmvpe",
1687
  interactive=True,
1688
  )
1689
-
1690
  crepe_hop_length = gr.Slider(
1691
  minimum=1,
1692
  maximum=512,
1693
  step=1,
1694
- label="Mangio-Crepe Hop Length. Higher numbers will reduce the chance of extreme pitch changes but lower numbers will increase accuracy. 64-192 is a good range to experiment with.",
 
 
1695
  value=120,
1696
  interactive=True,
1697
  visible=False,
@@ -1782,12 +1772,24 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
1782
  formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
1783
  frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
1784
  formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
 
 
 
1785
  with gr.Row():
1786
- vc_output1 = gr.Textbox("")
1787
  f0_file = gr.File(label=i18n("Retrieve feature occupancy F0 curve files, optionally,\
1788
  one pitch per line, instead of the default F0 and the upward\
1789
  and downward adjustment stop of the small white copy path with\
1790
  spaces at the beginning and end and carriage return ratio."), visible=False)
 
 
 
 
 
 
 
 
 
 
1791
 
1792
  but0.click(
1793
  vc_single,
@@ -1810,7 +1812,7 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
1810
  [vc_output1, vc_output2],
1811
  )
1812
 
1813
- with gr.Accordion("Batch Conversion",open=False):
1814
  with gr.Row():
1815
  with gr.Column():
1816
  vc_transform1 = gr.Number(
@@ -1858,11 +1860,6 @@ with gr.Blocks(title='RVC RULE1 v1', theme='step-3-profit/Midnight-Deep@=0.0.2')
1858
  inputs=[],
1859
  outputs=file_index4,
1860
  )
1861
- # file_big_npy2 = gr.Textbox(
1862
- # label=i18n("特征文件路径"),
1863
- # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1864
- # interactive=True,
1865
- # )
1866
  index_rate2 = gr.Slider(
1867
  minimum=0,
1868
  maximum=1,
 
1553
  else:
1554
  return f'./weights/{model}.pth', "Could not find Index file."
1555
 
1556
+ theme = gr.Theme(primary_hue="blue", secondary_hue="gray", neutral_hue="gray")
1557
+ with gr.Blocks(title='RVC First Rule v1', theme=theme) as app:
1558
  global person
1559
+ spk_item = gr.Slider(
1560
+ minimum=0,
1561
+ maximum=2333,
1562
+ step=1,
1563
+ label=i18n("To uninstall please select Speaker ID Timbre to save the video memory"),
1564
+ value=0,
1565
+ visible=False,
1566
+ interactive=True,
1567
+ )
1568
  with gr.Tabs():
1569
  with gr.TabItem("Inference"):
1570
+ gr.HTML("<h1> First Rule -- Humananity First </h1>")
1571
+ gr.HTML("""<h3> A few notes on the flow of the vocal cloning features:
1572
+ First, an audio clip needs to be either recorded or uploaded. These should
1573
+ be vocals only, preferable under 30 seconds in length. Longer clips
1574
+ can be cloned, but will take longer. Second, a model needs to be chosen --
1575
+ either Jenny's ("jenny.pth") or Joel's ("joel.pth"). If you do not see them as options,
1576
+ make sure to "Refresh" the model choices. Initially, simply
1577
+ clone the clip to set a baseline. Then, you can play around with the adjustment settings.
1578
+ This is especially useful for male to female or female to male conversions
1579
+ adjustment settings. This is especially useful for male to female or female
1580
+ to male conversions. There are other more advanced settings, including proportion
1581
+ of features retrieved, which can be used to adjust how closely the algorithm with
1582
+ match features from one voice to another. The default settings usually work well,
1583
+ but you can certainly play around with this to get different results. Most of all,
1584
+ have fun! </h3>
1585
+ """)
1586
+ gr.HTML("<h10> Huggingface version v1 -- DT </h10>")
1587
+ # gr.HTML("<h4> If you want to use this space privately, I recommend you duplicate the space. </h4>")
1588
 
1589
  # Inference Preset Row
1590
  # with gr.Row():
 
1596
 
1597
  # Other RVC stuff
1598
  with gr.Row():
1599
+ with gr.Column():
1600
+ dropbox=gr.File(label="Drop your audio here & hit the Reload button.")
1601
+ with gr.Column():
1602
+ vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.\
 
1603
  For male to female conversions, or vice versa, swap the voice first and then\
1604
  adjust the pitch after you get a baseline.", value=0)
1605
+ sid0 = gr.Dropdown(label="Choose your Model.", choices=sorted(names), value="joel.pth")
1606
+ sid0.change(
1607
+ fn=get_vc,
1608
+ inputs=[sid0],
1609
+ outputs=[spk_item],
1610
+ )
1611
+ refresh_button = gr.Button("Refresh Model List", variant="primary")
1612
+ if check_for_name() != '':
1613
+ get_vc(sorted(names)[0])
1614
+ file_index1 = gr.Dropdown(
1615
+ label="3. Path to your added.index file (if it didn't automatically find it.)",
1616
+ choices=[match_index(sid0)] if file_index else match_index("joel.pth"),
1617
+ value=match_index("joel.pth"),
1618
+ interactive=True,
1619
+ visible=False
1620
+ )
1621
+ sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
1622
+ refresh_button.click(
1623
+ fn=change_choices, inputs=[], outputs=[sid0, file_index1]
1624
+ )
1625
+ # file_big_npy1 = gr.Textbox(
1626
+ # label=i18n("特征文件路径"),
1627
+ # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1628
+ # interactive=True,
1629
+ # )
1630
  with gr.Row():
1631
  with gr.Column():
1632
+ record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1633
  with gr.Column():
1634
+ index_rate1 = gr.Slider(
1635
+ minimum=0,
1636
+ maximum=1,
1637
+ label=i18n("The proportion of features retrieved"),
1638
+ value=0.66,
1639
+ interactive=True,
1640
+ )
1641
+ with gr.Row():
1642
+ with gr.Column():
1643
+ input_audio0 = gr.Dropdown(
1644
+ label="2.Choose your audio. Hit refresh if you do not see all of your\
1645
+ clips. Recorded audio will be saved as a wav file with the timestamp of\
1646
+ when you recorded it.",
1647
+ value="./audios/someguy.mp3",
1648
+ choices=audio_files
 
 
 
 
 
 
 
 
 
 
 
 
 
1649
  )
1650
+ dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
1651
+ dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
1652
+ with gr.Column():
1653
+ vc_output2 = gr.Audio(
1654
+ label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
1655
+ type='filepath',
1656
+ interactive=False,
1657
+ )
1658
+ with gr.Row():
1659
+ with gr.Column():
1660
+ refresh_button2 = gr.Button("Refresh Audio Files", variant="primary")
1661
+ record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
1662
+ record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
1663
+ with gr.Column():
1664
+ but0 = gr.Button("Clone the clip", variant="primary")
1665
+ #clean_button = gr.Button(i18n("Uninstall the sound saving video memory"), variant="primary")
1666
+ #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
1667
+ with gr.Row(equal_height=True):
1668
+ with gr.Column():
1669
+ gr.Textbox(label="", value="Coming Soon... Real Time Text to Speech!")
1670
+ with gr.Column():
1671
  with gr.Accordion("Advanced Settings", open=False):
1672
  f0method0 = gr.Radio(
1673
  label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
 
1675
  value="rmvpe",
1676
  interactive=True,
1677
  )
 
1678
  crepe_hop_length = gr.Slider(
1679
  minimum=1,
1680
  maximum=512,
1681
  step=1,
1682
+ label="Mangio-Crepe Hop Length. Higher numbers will reduce the\
1683
+ chance of extreme pitch changes but lower numbers will increase\
1684
+ accuracy. 64-192 is a good range to experiment with.",
1685
  value=120,
1686
  interactive=True,
1687
  visible=False,
 
1772
  formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
1773
  frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
1774
  formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
1775
+ animation = gr.Video(type='filepath', visible=False)
1776
+ refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation]) # with gr.Row():
1777
+ animate_button = gr.Button('Animate', visible=False)
1778
  with gr.Row():
 
1779
  f0_file = gr.File(label=i18n("Retrieve feature occupancy F0 curve files, optionally,\
1780
  one pitch per line, instead of the default F0 and the upward\
1781
  and downward adjustment stop of the small white copy path with\
1782
  spaces at the beginning and end and carriage return ratio."), visible=False)
1783
+ vc_output1 = gr.Textbox("", visible=False)
1784
+ tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.", visible=False)
1785
+ tts_button = gr.Button(value="Speak", visible=False)
1786
+ lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',
1787
+ choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en', visible=False)
1788
+ api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='', visible=False)
1789
+ elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices, visible=False)
1790
+ tts_button.click(fn=elevenTTS, inputs=[api_box, tfs, elevenid, lang], outputs=[record_button, input_audio0])
1791
+ with gr.Accordion('Text To Speech', open=False, visible=False):
1792
+ gr.HTML("<h3> Text To Speech </h3>")
1793
 
1794
  but0.click(
1795
  vc_single,
 
1812
  [vc_output1, vc_output2],
1813
  )
1814
 
1815
+ with gr.Accordion("Batch Conversion",open=False, visible=False):
1816
  with gr.Row():
1817
  with gr.Column():
1818
  vc_transform1 = gr.Number(
 
1860
  inputs=[],
1861
  outputs=file_index4,
1862
  )
 
 
 
 
 
1863
  index_rate2 = gr.Slider(
1864
  minimum=0,
1865
  maximum=1,