Pipe1213 commited on
Commit
1524fa3
·
verified ·
1 Parent(s): c137b2c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')
4
+
5
+ import json
6
+ import math
7
+ import torch
8
+ from torch import nn
9
+ from torch.nn import functional as F
10
+ from torch.utils.data import DataLoader
11
+
12
+ import commons
13
+ import utils
14
+ from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
15
+ from models import SynthesizerTrn
16
+ from text.symbols import symbols as symbols_default
17
+
18
+ from scipy.io.wavfile import write
19
+ from text import cleaners
20
+
21
+ model_configs = {
22
+ "Graphemes": {
23
+ "path": "french_model_vits/G_700000.pth",
24
+ "symbols": symbols_default
25
+ }
26
+ }
27
+
28
+ # Global variables
29
+ net_g = None
30
+ symbols = []
31
+ _symbol_to_id = {}
32
+ _id_to_symbol = {}
33
+
34
+ def text_to_sequence(text, cleaner_names):
35
+ sequence = []
36
+ clean_text = _clean_text(text, cleaner_names)
37
+ for symbol in clean_text:
38
+ symbol_id = _symbol_to_id[symbol]
39
+ sequence += [symbol_id]
40
+ return sequence
41
+
42
+ def _clean_text(text, cleaner_names):
43
+ for name in cleaner_names:
44
+ cleaner = getattr(cleaners, name)
45
+ if not cleaner:
46
+ raise Exception('Unknown cleaner: %s' % name)
47
+ text = cleaner(text)
48
+ return text
49
+
50
+ def get_text(text, hps):
51
+ text_norm = text_to_sequence(text, hps.data.text_cleaners)
52
+ if (hps.data.add_blank):
53
+ text_norm = commons.intersperse(text_norm, 0)
54
+ text_norm = torch.LongTensor(text_norm)
55
+ return text_norm
56
+
57
+ def load_model_and_symbols(tab_name):
58
+ global net_g, symbols, _symbol_to_id, _id_to_symbol
59
+ model_config = model_configs[tab_name]
60
+ symbols = model_config["symbols"]
61
+ _symbol_to_id = {s: i for i, s in enumerate(symbols)}
62
+ _id_to_symbol = {i: s for i, s in enumerate(symbols)}
63
+
64
+ net_g = SynthesizerTrn(
65
+ len(symbols),
66
+ hps.data.filter_length // 2 + 1,
67
+ hps.train.segment_size // hps.data.hop_length,
68
+ n_speakers=hps.data.n_speakers,
69
+ **hps.model)
70
+ _ = net_g.eval()
71
+ _ = utils.load_checkpoint(model_config["path"], net_g, None)
72
+
73
+ def tts(text, speaker_id, tab_name):
74
+ load_model_and_symbols(tab_name)
75
+ sid = torch.LongTensor([speaker_id]) # speaker identity
76
+ stn_tst = get_text(text, hps)
77
+
78
+ with torch.no_grad():
79
+ x_tst = stn_tst.unsqueeze(0)
80
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
81
+ audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
82
+ 0, 0].data.float().numpy()
83
+ return "Success", (hps.data.sampling_rate, audio)
84
+
85
+ def create_tab(tab_name):
86
+ with gr.TabItem(tab_name):
87
+ gr.Markdown(f"### {tab_name} TTS Model")
88
+ tts_input1 = gr.TextArea(label="Text in french", value="")
89
+ tts_input2 = gr.Dropdown(label="Speaker", choices=["Male", "Female"], type="index", value="Male")
90
+ tts_submit = gr.Button("Generate", variant="primary")
91
+ tts_output1 = gr.Textbox(label="Message")
92
+ tts_output2 = gr.Audio(label="Output")
93
+ tts_submit.click(lambda text, speaker_id: tts(text, speaker_id, tab_name), [tts_input1, tts_input2], [tts_output1, tts_output2])
94
+
95
+ hps = utils.get_hparams_from_file("configs/vctk_base.json")
96
+
97
+ app = gr.Blocks()
98
+ with app:
99
+ gr.Markdown(
100
+ """
101
+ # VITS Implementation for French
102
+
103
+ Based on VITS (https://github.com/jaywalnut310/vits).
104
+
105
+ ## How to use:
106
+ Write the text on the box below. For faster inference, it is recommended to use short sentences.
107
+
108
+ ## Hint: Some sample texts are available at the bottom of the web site.
109
+ """
110
+ )
111
+ with gr.Tabs():
112
+ create_tab("French TTS")
113
+
114
+ gr.Markdown(
115
+ """
116
+ ## Examples
117
+ | Input Text | Speaker |
118
+ |------------|---------|
119
+ | On ne voit bien qu'avec le cœur, l'essentiel est invisible pour les yeux. | Female |
120
+ | Voilà plusieurs fois, Monsieur, que je vous rencontre sur mon chemin. C’est autant de fois de trop, et j’en ai assez de perdre mon temps à déjouer les pièges que vous me tendez. | Male |
121
+ | Je pense, donc je suis. | Female |
122
+ | La vie est un sommeil, l'amour en est le rêve, et vous aurez vécu si vous avez aimé. | Male |
123
+ """
124
+ )
125
+
126
+ app.launch()