TTS-Spaces-Arena

Running

App Files Files Community

Pendrokar commited on Mar 21

Commit

ad6af40

1 Parent(s): a87479b

new TTS: sesame

Browse files

Files changed (2) hide show

app/leaderboard.py +1 -1
app/models.py +22 -2

app/leaderboard.py CHANGED Viewed

@@ -54,7 +54,7 @@ def get_leaderboard(reveal_prelim = False):
             and '/' in orig_name
         ):
             style = 'text-decoration: underline;text-decoration-style: dotted; color: var(--link-text-color);'
-            title = 'Rejections'
             # win rate dataset
             df.at[i, 'Win Rate'] = f'<a target="_blank" style="{style}" title="{title}" href="https://huggingface.co/datasets/{DB_DATASET_ID}/viewer/summary/rejections?f[rejected][value]=%27{orig_name}%27">' + df['Win Rate'].iloc[i] + '</a>'
     df['Elo'] = round(df['Elo'])

             and '/' in orig_name
         ):
             style = 'text-decoration: underline;text-decoration-style: dotted; color: var(--link-text-color);'
+            title = 'See rejections'
             # win rate dataset
             df.at[i, 'Win Rate'] = f'<a target="_blank" style="{style}" title="{title}" href="https://huggingface.co/datasets/{DB_DATASET_ID}/viewer/summary/rejections?f[rejected][value]=%27{orig_name}%27">' + df['Win Rate'].iloc[i] + '</a>'
     df['Elo'] = round(df['Elo'])

app/models.py CHANGED Viewed

@@ -101,6 +101,9 @@ AVAILABLE_MODELS = {
     # Spark
     'thunnai/SparkTTS': 'thunnai/SparkTTS',
     # HF TTS w issues
     # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
     # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
@@ -261,7 +264,7 @@ HF_SPACES = {
         'text_param_index': 'text',
         'return_audio_index': 0,
         'series': 'Fish Speech',
-        'emoji': '😷', # broken space
     },
     # F5 TTS
@@ -481,6 +484,15 @@ HF_SPACES = {
         'is_zero_gpu_space': True,
         'series': 'Spark-TTS',
     },
 }
 # for zero-shot TTS - voice sample used by XTTS (11 seconds)
@@ -765,7 +777,15 @@ OVERRIDE_INPUTS = {
 		'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
 		'prompt_wav_upload': DEFAULT_VOICE_SAMPLE,
 		'prompt_wav_record': None,
-    }
 }
 # minor mods to model from the same space

     # Spark
     'thunnai/SparkTTS': 'thunnai/SparkTTS',
+    # Sesame
+    'sesame/csm-1b' : 'sesame/csm-1b',
     # HF TTS w issues
     # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
     # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
         'text_param_index': 'text',
         'return_audio_index': 0,
         'series': 'Fish Speech',
+        # 'emoji': '😷',
     },
     # F5 TTS
         'is_zero_gpu_space': True,
         'series': 'Spark-TTS',
     },
+    'sesame/csm-1b' : {
+        'name': 'sesame/csm-1b',
+        'function': '/infer',
+        'text_param_index': 'gen_conversation_input',
+        'return_audio_index': 0,
+        'is_zero_gpu_space': True,
+        'series': 'Spark-TTS',
+    },
 }
 # for zero-shot TTS - voice sample used by XTTS (11 seconds)
 		'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
 		'prompt_wav_upload': DEFAULT_VOICE_SAMPLE,
 		'prompt_wav_record': None,
+    },
+    # sesame/csm-1b
+    'sesame/csm-1b' : {
+		"text_prompt_speaker_a": "And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.",
+		"text_prompt_speaker_b": "And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.", #second speaker unused
+		"audio_prompt_speaker_a": handle_file('voice_samples/read_speech_a.wav'),
+		"audio_prompt_speaker_b": handle_file('voice_samples/read_speech_a.wav'), #second speaker unused
+    },
 }
 # minor mods to model from the same space