English-Japanese-Anime-TTS

Runtime error

App Files Files Community

meraih commited on Jun 10, 2023

Commit

fd1d2fe

1 Parent(s): 2bfeb03

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -4

app.py CHANGED Viewed

@@ -36,7 +36,25 @@ limitation = os.getenv("SYSTEM") == "spaces"  # limit text and audio length in h
 def create_tts_fn(model, hps, speaker_ids):
     def tts_fn(text, speaker, language, speed, is_symbol):
     return tts_fn
@@ -47,7 +65,7 @@ def create_vc_fn(model, hps, speaker_ids):
             return "You need to upload an audio", None
         sampling_rate, audio = input_audio
         duration = audio.shape[0] / sampling_rate
-        if limitation and duration > 3600:
             return "Error: Audio is too long", None
         original_speaker_id = speaker_ids[original_speaker]
         target_speaker_id = speaker_ids[target_speaker]
@@ -210,9 +228,7 @@ if __name__ == "__main__":
                                         text_input.selectionEnd = startPos + symbols[i].length;
                                         text_input.blur();
                                         window.scrollTo(x, y);
                                         text = text_input.value;
                                         return text;
                                     }}""")
                                     # select character

 def create_tts_fn(model, hps, speaker_ids):
     def tts_fn(text, speaker, language, speed, is_symbol):
+        if limitation:
+            text_len = len(re.sub("\[([A-Z]{2})\]", "", text))
+            max_len = 150
+            if is_symbol:
+                max_len *= 3
+            if text_len > max_len:
+                return "Error: Text is too long", None
+        if language is not None:
+            text = language_marks[language] + text + language_marks[language]
+        speaker_id = speaker_ids[speaker]
+        stn_tst = get_text(text, hps, is_symbol)
+        with no_grad():
+            x_tst = stn_tst.unsqueeze(0)
+            x_tst_lengths = LongTensor([stn_tst.size(0)])
+            sid = LongTensor([speaker_id])
+            audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
+                                length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
+        del stn_tst, x_tst, x_tst_lengths, sid
+        return "Success", (hps.data.sampling_rate, audio)
     return tts_fn
             return "You need to upload an audio", None
         sampling_rate, audio = input_audio
         duration = audio.shape[0] / sampling_rate
+        if limitation and duration > 30:
             return "Error: Audio is too long", None
         original_speaker_id = speaker_ids[original_speaker]
         target_speaker_id = speaker_ids[target_speaker]
                                         text_input.selectionEnd = startPos + symbols[i].length;
                                         text_input.blur();
                                         window.scrollTo(x, y);
                                         text = text_input.value;
                                         return text;
                                     }}""")
                                     # select character