Pendrokar commited on
Commit
fcff623
·
1 Parent(s): 9b87855

New TTS: Chatterbox

Browse files
Files changed (3) hide show
  1. app/models.py +21 -0
  2. test_tts_chatterbox.py +50 -0
  3. test_tts_zonos.py +4 -3
app/models.py CHANGED
@@ -115,6 +115,9 @@ AVAILABLE_MODELS = {
115
  # Dia
116
  # 'nari-labs/Dia-1.6B': 'nari-labs/Dia-1.6B', # single speaker hallucinates
117
 
 
 
 
118
  # HF TTS w issues
119
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
120
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
@@ -543,6 +546,15 @@ HF_SPACES = {
543
  'is_zero_gpu_space': True,
544
  'series': 'Dia',
545
  },
 
 
 
 
 
 
 
 
 
546
  }
547
 
548
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
@@ -866,6 +878,15 @@ OVERRIDE_INPUTS = {
866
  'cfg_filter_top_k': 35, # Top k filter for CFG guidance.
867
  'speed_factor': 0.94, # Adjusts the speed of the generated audio (1.0 = original speed).
868
  },
 
 
 
 
 
 
 
 
 
869
  }
870
 
871
  # minor mods to model from the same space
 
115
  # Dia
116
  # 'nari-labs/Dia-1.6B': 'nari-labs/Dia-1.6B', # single speaker hallucinates
117
 
118
+ # Chatterbox
119
+ 'ResembleAI/Chatterbox': 'ResembleAI/Chatterbox',
120
+
121
  # HF TTS w issues
122
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
123
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
 
546
  'is_zero_gpu_space': True,
547
  'series': 'Dia',
548
  },
549
+
550
+ 'ResembleAI/Chatterbox' : {
551
+ 'name': 'Chatterbox',
552
+ 'function': '/generate_tts_audio',
553
+ 'text_param_index': 'text_input',
554
+ 'return_audio_index': 0,
555
+ 'is_zero_gpu_space': True,
556
+ 'series': 'Chatterbox',
557
+ },
558
  }
559
 
560
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
 
878
  'cfg_filter_top_k': 35, # Top k filter for CFG guidance.
879
  'speed_factor': 0.94, # Adjusts the speed of the generated audio (1.0 = original speed).
880
  },
881
+
882
+ # Chatterbox
883
+ 'ResembleAI/Chatterbox': {
884
+ 'audio_prompt_path_input': DEFAULT_VOICE_SAMPLE, # voice
885
+ 'exaggeration_input': 0.5, # 1-2
886
+ 'temperature_input': 0.8, # Lower values make the output more deterministic, higher values increase randomness.
887
+ 'seed_num_input': 1, # Seed for random number generation, can be any integer.
888
+ 'cfgw_input': 0.5, # CFG/Pace weight, can be any float value.
889
+ }
890
  }
891
 
892
  # minor mods to model from the same space
test_tts_chatterbox.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from test_overrides import _get_param_examples, _override_params
3
+ from gradio_client import Client, file
4
+
5
+ model = "ResembleAI/Chatterbox"
6
+ client = Client(model, hf_token=os.getenv('HF_TOKEN'))
7
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
8
+ # print(endpoints)
9
+
10
+ api_name = '/generate_tts_audio'
11
+ fn_index = None
12
+ end_parameters = None
13
+ text = 'This is what my voice sounds like.'
14
+
15
+ end_parameters = _get_param_examples(
16
+ endpoints['named_endpoints'][api_name]['parameters']
17
+ )
18
+ print(end_parameters)
19
+
20
+
21
+ space_inputs = end_parameters
22
+ # override some or all default parameters
23
+ space_inputs = _override_params(end_parameters, model)
24
+
25
+ if(type(space_inputs) == dict):
26
+ space_inputs['text_input'] = text
27
+ result = client.predict(
28
+ **space_inputs,
29
+ api_name=api_name,
30
+ fn_index=fn_index
31
+ )
32
+ else:
33
+ space_inputs[0] = text
34
+ result = client.predict(
35
+ *space_inputs,
36
+ api_name=api_name,
37
+ fn_index=fn_index
38
+ )
39
+ # space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
40
+
41
+ print(space_inputs)
42
+ # print(*space_inputs)
43
+ # print(**space_inputs)
44
+
45
+ # result = client.predict(
46
+ # **space_inputs,
47
+ # api_name=api_name,
48
+ # fn_index=fn_index
49
+ # )
50
+ print(result)
test_tts_zonos.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  from test_overrides import _get_param_examples, _override_params
3
  from gradio_client import Client, file
4
 
5
- model = "Steveeeeeeen/Zonos/hybrid"
 
6
  # client = Client("Pendrokar/Zonos", hf_token=os.getenv('HF_TOKEN'))
7
  client = Client("Steveeeeeeen/Zonos", hf_token=os.getenv('HF_TOKEN'))
8
  # client = Client(model, hf_token=os.getenv('HF_TOKEN'))
@@ -12,7 +13,7 @@ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format=
12
  api_name = '/generate_audio'
13
  fn_index = None
14
  end_parameters = None
15
- text = 'This is what my voice sounds like.'
16
 
17
  end_parameters = _get_param_examples(
18
  endpoints['named_endpoints'][api_name]['parameters']
@@ -20,7 +21,7 @@ end_parameters = _get_param_examples(
20
  print(end_parameters)
21
 
22
 
23
- space_inputs = end_parameters
24
  # override some or all default parameters
25
  space_inputs = _override_params(end_parameters, model)
26
 
 
2
  from test_overrides import _get_param_examples, _override_params
3
  from gradio_client import Client, file
4
 
5
+ model = "Steveeeeeeen/Zonos"
6
+ # model = "Steveeeeeeen/Zonos/hybrid"
7
  # client = Client("Pendrokar/Zonos", hf_token=os.getenv('HF_TOKEN'))
8
  client = Client("Steveeeeeeen/Zonos", hf_token=os.getenv('HF_TOKEN'))
9
  # client = Client(model, hf_token=os.getenv('HF_TOKEN'))
 
13
  api_name = '/generate_audio'
14
  fn_index = None
15
  end_parameters = None
16
+ text = 'The young girl.'
17
 
18
  end_parameters = _get_param_examples(
19
  endpoints['named_endpoints'][api_name]['parameters']
 
21
  print(end_parameters)
22
 
23
 
24
+ # space_inputs = end_parameters
25
  # override some or all default parameters
26
  space_inputs = _override_params(end_parameters, model)
27