Pendrokar commited on
Commit
ad6af40
·
1 Parent(s): a87479b

new TTS: sesame

Browse files
Files changed (2) hide show
  1. app/leaderboard.py +1 -1
  2. app/models.py +22 -2
app/leaderboard.py CHANGED
@@ -54,7 +54,7 @@ def get_leaderboard(reveal_prelim = False):
54
  and '/' in orig_name
55
  ):
56
  style = 'text-decoration: underline;text-decoration-style: dotted; color: var(--link-text-color);'
57
- title = 'Rejections'
58
  # win rate dataset
59
  df.at[i, 'Win Rate'] = f'<a target="_blank" style="{style}" title="{title}" href="https://huggingface.co/datasets/{DB_DATASET_ID}/viewer/summary/rejections?f[rejected][value]=%27{orig_name}%27">' + df['Win Rate'].iloc[i] + '</a>'
60
  df['Elo'] = round(df['Elo'])
 
54
  and '/' in orig_name
55
  ):
56
  style = 'text-decoration: underline;text-decoration-style: dotted; color: var(--link-text-color);'
57
+ title = 'See rejections'
58
  # win rate dataset
59
  df.at[i, 'Win Rate'] = f'<a target="_blank" style="{style}" title="{title}" href="https://huggingface.co/datasets/{DB_DATASET_ID}/viewer/summary/rejections?f[rejected][value]=%27{orig_name}%27">' + df['Win Rate'].iloc[i] + '</a>'
60
  df['Elo'] = round(df['Elo'])
app/models.py CHANGED
@@ -101,6 +101,9 @@ AVAILABLE_MODELS = {
101
  # Spark
102
  'thunnai/SparkTTS': 'thunnai/SparkTTS',
103
 
 
 
 
104
  # HF TTS w issues
105
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
106
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
@@ -261,7 +264,7 @@ HF_SPACES = {
261
  'text_param_index': 'text',
262
  'return_audio_index': 0,
263
  'series': 'Fish Speech',
264
- 'emoji': '😷', # broken space
265
  },
266
 
267
  # F5 TTS
@@ -481,6 +484,15 @@ HF_SPACES = {
481
  'is_zero_gpu_space': True,
482
  'series': 'Spark-TTS',
483
  },
 
 
 
 
 
 
 
 
 
484
  }
485
 
486
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
@@ -765,7 +777,15 @@ OVERRIDE_INPUTS = {
765
  'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
766
  'prompt_wav_upload': DEFAULT_VOICE_SAMPLE,
767
  'prompt_wav_record': None,
768
- }
 
 
 
 
 
 
 
 
769
  }
770
 
771
  # minor mods to model from the same space
 
101
  # Spark
102
  'thunnai/SparkTTS': 'thunnai/SparkTTS',
103
 
104
+ # Sesame
105
+ 'sesame/csm-1b' : 'sesame/csm-1b',
106
+
107
  # HF TTS w issues
108
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
109
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
 
264
  'text_param_index': 'text',
265
  'return_audio_index': 0,
266
  'series': 'Fish Speech',
267
+ # 'emoji': '😷',
268
  },
269
 
270
  # F5 TTS
 
484
  'is_zero_gpu_space': True,
485
  'series': 'Spark-TTS',
486
  },
487
+
488
+ 'sesame/csm-1b' : {
489
+ 'name': 'sesame/csm-1b',
490
+ 'function': '/infer',
491
+ 'text_param_index': 'gen_conversation_input',
492
+ 'return_audio_index': 0,
493
+ 'is_zero_gpu_space': True,
494
+ 'series': 'Spark-TTS',
495
+ },
496
  }
497
 
498
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
 
777
  'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
778
  'prompt_wav_upload': DEFAULT_VOICE_SAMPLE,
779
  'prompt_wav_record': None,
780
+ },
781
+
782
+ # sesame/csm-1b
783
+ 'sesame/csm-1b' : {
784
+ "text_prompt_speaker_a": "And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.",
785
+ "text_prompt_speaker_b": "And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.", #second speaker unused
786
+ "audio_prompt_speaker_a": handle_file('voice_samples/read_speech_a.wav'),
787
+ "audio_prompt_speaker_b": handle_file('voice_samples/read_speech_a.wav'), #second speaker unused
788
+ },
789
  }
790
 
791
  # minor mods to model from the same space