Pendrokar commited on
Commit
57a5f00
·
1 Parent(s): e5a3534

new TTS: MegaTTS3

Browse files
app/models.py CHANGED
@@ -243,8 +243,8 @@ HF_SPACES = {
243
  'text_param_index': 'text',
244
  'return_audio_index': 0,
245
  'series': 'MeloTTS',
246
- 'emoji': '😒', # Narration voice
247
- 'title': 'Narration voice',
248
  },
249
 
250
  # Parler Mini
@@ -552,7 +552,7 @@ HF_SPACES = {
552
  'return_audio_index': 0,
553
  'is_zero_gpu_space': True,
554
  'series': 'Zonos',
555
- 'title': 'Outclassed',
556
  },
557
  'Steveeeeeeen/Zonos/hybrid': {
558
  'name': 'Zonos H',
@@ -562,6 +562,7 @@ HF_SPACES = {
562
  'is_zero_gpu_space': True,
563
  'series': 'Zonos',
564
  'title': 'Outclassed',
 
565
  },
566
 
567
  # Spark-TTS
@@ -622,6 +623,15 @@ HF_SPACES = {
622
  'is_zero_gpu_space': True,
623
  'series': 'Chatterbox',
624
  },
 
 
 
 
 
 
 
 
 
625
  }
626
 
627
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
@@ -968,12 +978,22 @@ OVERRIDE_INPUTS = {
968
  'temperature_input': 0.8, # Lower values make the output more deterministic, higher values increase randomness.
969
  'seed_num_input': 1, # Seed for random number generation, can be any integer.
970
  'cfgw_input': 0.5, # CFG/Pace weight, can be any float value.
971
- }
 
 
 
 
 
 
 
 
 
972
  }
973
 
974
  # minor mods to model from the same space
975
  OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid'] = OVERRIDE_INPUTS['Steveeeeeeen/Zonos']
976
  OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid']['model_choice'] = 'Zyphra/Zonos-v0.1-hybrid'
 
977
  OVERRIDE_INPUTS['PHBJT/multi_parler_tts/reformatted'] = OVERRIDE_INPUTS['PHBJT/multi_parler_tts']
978
  OVERRIDE_INPUTS['PHBJT/multi_parler_tts/reformatted']['do_format'] = True
979
 
@@ -1038,7 +1058,7 @@ closed_source = [
1038
  ]
1039
 
1040
  # top five models in order to always have one of them picked and scrutinized
1041
- top_five = ['PHBJT/multi_parler_tts', 'fishaudio/openaudio-s1-mini', 'ResembleAI/Chatterbox']
1042
 
1043
  # prioritize low vote models
1044
  sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
 
243
  'text_param_index': 'text',
244
  'return_audio_index': 0,
245
  'series': 'MeloTTS',
246
+ 'emoji': '😷', # broken space / Narration voice
247
+ 'title': 'Broken space / Outclassed narration voice',
248
  },
249
 
250
  # Parler Mini
 
552
  'return_audio_index': 0,
553
  'is_zero_gpu_space': True,
554
  'series': 'Zonos',
555
+ # 'title': 'Outclassed',
556
  },
557
  'Steveeeeeeen/Zonos/hybrid': {
558
  'name': 'Zonos H',
 
562
  'is_zero_gpu_space': True,
563
  'series': 'Zonos',
564
  'title': 'Outclassed',
565
+ 'space_link': 'Steveeeeeeen/Zonos',
566
  },
567
 
568
  # Spark-TTS
 
623
  'is_zero_gpu_space': True,
624
  'series': 'Chatterbox',
625
  },
626
+
627
+ 'ByteDance/MegaTTS3': {
628
+ 'name': 'MegaTTS',
629
+ 'function': '/predict',
630
+ 'text_param_index': 'inp_text',
631
+ 'return_audio_index': 0,
632
+ 'is_zero_gpu_space': True,
633
+ 'series': 'MegaTTS',
634
+ },
635
  }
636
 
637
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
 
978
  'temperature_input': 0.8, # Lower values make the output more deterministic, higher values increase randomness.
979
  'seed_num_input': 1, # Seed for random number generation, can be any integer.
980
  'cfgw_input': 0.5, # CFG/Pace weight, can be any float value.
981
+ },
982
+
983
+ # MegaTTS
984
+ 'ByteDance/MegaTTS3': {
985
+ 'inp_audio': handle_file('voice_samples/xtts_sample_megatts.wav'),
986
+ 'inp_npy': handle_file('voice_samples/xtts_sample_megatts.npy'),
987
+ 'infer_timestep': 32,
988
+ 'p_w': 1.4,
989
+ 't_w': 3,
990
+ },
991
  }
992
 
993
  # minor mods to model from the same space
994
  OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid'] = OVERRIDE_INPUTS['Steveeeeeeen/Zonos']
995
  OVERRIDE_INPUTS['Steveeeeeeen/Zonos/hybrid']['model_choice'] = 'Zyphra/Zonos-v0.1-hybrid'
996
+
997
  OVERRIDE_INPUTS['PHBJT/multi_parler_tts/reformatted'] = OVERRIDE_INPUTS['PHBJT/multi_parler_tts']
998
  OVERRIDE_INPUTS['PHBJT/multi_parler_tts/reformatted']['do_format'] = True
999
 
 
1058
  ]
1059
 
1060
  # top five models in order to always have one of them picked and scrutinized
1061
+ top_five = ['ByteDance/MegaTTS3']
1062
 
1063
  # prioritize low vote models
1064
  sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
test_tts_megatts.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from test_overrides import _get_param_examples, _override_params
3
+ from gradio_client import Client, file
4
+
5
+ model = "ByteDance/MegaTTS3"
6
+ client = Client(model, hf_token=os.getenv('HF_TOKEN'))
7
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
8
+ # print(endpoints)
9
+
10
+ api_name = '/predict'
11
+ fn_index = None
12
+ end_parameters = None
13
+ text = 'This is what my voice sounds like.'
14
+
15
+ end_parameters = _get_param_examples(
16
+ endpoints['named_endpoints'][api_name]['parameters']
17
+ )
18
+ print(end_parameters)
19
+
20
+ space_inputs = end_parameters
21
+ # override some or all default parameters
22
+ space_inputs = _override_params(end_parameters, model)
23
+
24
+ if(type(space_inputs) == dict):
25
+ space_inputs['inp_text'] = text
26
+ result = client.predict(
27
+ **space_inputs,
28
+ api_name=api_name,
29
+ fn_index=fn_index
30
+ )
31
+ else:
32
+ space_inputs[0] = text
33
+ result = client.predict(
34
+ *space_inputs,
35
+ api_name=api_name,
36
+ fn_index=fn_index
37
+ )
38
+ # space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
39
+
40
+ print(space_inputs)
41
+ # print(*space_inputs)
42
+ # print(**space_inputs)
43
+
44
+ # result = client.predict(
45
+ # **space_inputs,
46
+ # api_name=api_name,
47
+ # fn_index=fn_index
48
+ # )
49
+ print(result)
voice_samples/xtts_sample_megatts.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb3a00932b84ace3bf94fef3dcba7c328608276107b6d36c34e33bcede8666f4
3
+ size 38016
voice_samples/xtts_sample_megatts.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
3
+ size 1002030