Staticaliza commited on
Commit
dfe5a3d
·
verified ·
1 Parent(s): 9487e3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -2,7 +2,7 @@
2
  import gradio as gr
3
  import spaces
4
  import torch
5
-
6
  from kokoro import KModel, KPipeline
7
 
8
  # Pre-Initialize
@@ -15,7 +15,6 @@ torch.set_num_threads(4)
15
 
16
  # Variables
17
  CHAR_LIMIT = 2000
18
-
19
  DEFAULT_INPUT = ""
20
  DEFAULT_VOICE = "af_heart"
21
 
@@ -59,8 +58,13 @@ for v in CHOICES.values():
59
  VOICE_PACKS[v] = PIPELINES[v[0]].load_voice(v)
60
 
61
  model_instance = KModel().to(DEVICE).eval()
62
- MODEL = torch.jit.script(model_instance)
63
 
 
 
 
 
 
 
64
  css = '''
65
  .gradio-container{max-width: 560px !important}
66
  h1{text-align:center}
@@ -69,16 +73,26 @@ footer {
69
  }
70
  '''
71
 
 
 
 
 
 
 
 
 
 
72
  # Functions
73
  def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1):
74
  text = text.strip()[:CHAR_LIMIT] + "."
75
  pipeline = PIPELINES[voice[0]]
76
  pack = VOICE_PACKS[voice]
77
-
78
  for _, ps, _ in pipeline(text, voice, speed):
79
  ref_s = pack[len(ps) - 1]
80
  audio = MODEL(ps, ref_s, speed)
81
- return (24000, audio.numpy())
 
 
82
 
83
  def cloud():
84
  print("[CLOUD] | Space maintained.")
@@ -93,13 +107,10 @@ with gr.Blocks(css=css) as main:
93
  input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input")
94
  voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice")
95
  speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed")
96
-
97
  submit = gr.Button("▶")
98
  maintain = gr.Button("☁️")
99
-
100
  with gr.Column():
101
  output = gr.Audio(label="Output")
102
-
103
  submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output)
104
  maintain.click(cloud, inputs=[], outputs=[], queue=False)
105
 
 
2
  import gradio as gr
3
  import spaces
4
  import torch
5
+ import numpy as np
6
  from kokoro import KModel, KPipeline
7
 
8
  # Pre-Initialize
 
15
 
16
  # Variables
17
  CHAR_LIMIT = 2000
 
18
  DEFAULT_INPUT = ""
19
  DEFAULT_VOICE = "af_heart"
20
 
 
58
  VOICE_PACKS[v] = PIPELINES[v[0]].load_voice(v)
59
 
60
  model_instance = KModel().to(DEVICE).eval()
 
61
 
62
+ try:
63
+ MODEL = torch.jit.script(model_instance)
64
+ except Exception as e:
65
+ print("torch.jit.script failed, using original model:", e)
66
+ MODEL = model_instance
67
+
68
  css = '''
69
  .gradio-container{max-width: 560px !important}
70
  h1{text-align:center}
 
73
  }
74
  '''
75
 
76
+ def trim_silence(audio, threshold=0.001):
77
+ abs_audio = np.abs(audio)
78
+ indices = np.where(abs_audio > threshold)[0]
79
+ if len(indices) == 0:
80
+ return audio
81
+ start = indices[0]
82
+ end = indices[-1] + 1
83
+ return audio[start:end]
84
+
85
  # Functions
86
  def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1):
87
  text = text.strip()[:CHAR_LIMIT] + "."
88
  pipeline = PIPELINES[voice[0]]
89
  pack = VOICE_PACKS[voice]
 
90
  for _, ps, _ in pipeline(text, voice, speed):
91
  ref_s = pack[len(ps) - 1]
92
  audio = MODEL(ps, ref_s, speed)
93
+ audio_np = audio.numpy()
94
+ trimmed_audio = trim_silence(audio_np)
95
+ return (24000, trimmed_audio)
96
 
97
  def cloud():
98
  print("[CLOUD] | Space maintained.")
 
107
  input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input")
108
  voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice")
109
  speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed")
 
110
  submit = gr.Button("▶")
111
  maintain = gr.Button("☁️")
 
112
  with gr.Column():
113
  output = gr.Audio(label="Output")
 
114
  submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output)
115
  maintain.click(cloud, inputs=[], outputs=[], queue=False)
116