mantrakp commited on
Commit
acde4c3
β€’
1 Parent(s): 84df228

Refactor app.py to add audio tab and update gradio UI

Browse files
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
 
3
  from config import css
4
  from tabs.images.ui import image_tab
 
 
5
 
6
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
7
  # Header
@@ -16,13 +18,14 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
16
  with gr.Tabs():
17
  with gr.Tab(label="πŸ–ΌοΈ Image"):
18
  image_tab()
19
- # with gr.Tab(label="🎡 Audio"):
20
- # audio_tab()
21
  # with gr.Tab(label="πŸŽ₯ Video"):
22
  # video_tab()
23
  # with gr.Tab(label="πŸ“ Text"):
24
  # text_tab()
25
 
 
26
  demo.launch(
27
  share=False,
28
  debug=True,
 
2
 
3
  from config import css
4
  from tabs.images.ui import image_tab
5
+ from tabs.audios.ui import audio_tab
6
+
7
 
8
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
9
  # Header
 
18
  with gr.Tabs():
19
  with gr.Tab(label="πŸ–ΌοΈ Image"):
20
  image_tab()
21
+ with gr.Tab(label="🎡 Audio"):
22
+ audio_tab()
23
  # with gr.Tab(label="πŸŽ₯ Video"):
24
  # video_tab()
25
  # with gr.Tab(label="πŸ“ Text"):
26
  # text_tab()
27
 
28
+
29
  demo.launch(
30
  share=False,
31
  debug=True,
config.py CHANGED
@@ -73,3 +73,7 @@ class Config:
73
  "compute_type": torch.bfloat16,
74
  }
75
  ]
 
 
 
 
 
73
  "compute_type": torch.bfloat16,
74
  }
75
  ]
76
+
77
+
78
+ # Audios
79
+ AUDIOS_MODELS = [{"repo_id": "fal/AuraSR-v2"}]
requirements.txt CHANGED
@@ -22,3 +22,5 @@ git+https://github.com/mantrakp04/BasicSR-fix.git
22
  git+https://github.com/TencentARC/GFPGAN.git
23
  git+https://github.com/xinntao/Real-ESRGAN.git
24
  aura_sr
 
 
 
22
  git+https://github.com/TencentARC/GFPGAN.git
23
  git+https://github.com/xinntao/Real-ESRGAN.git
24
  aura_sr
25
+ deepfilternet
26
+ styletts2
tabs/audios/events.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import tempfile
4
+ from uuid import uuid4
5
+
6
+ import spaces
7
+ import gradio as gr
8
+ import numpy as np
9
+ from df.enhance import enhance, load_audio, save_audio
10
+
11
+ from config import Config
12
+ from .load_models import *
13
+
14
+
15
+ # Helper functions
16
+ def create_temp_file():
17
+ return tempfile.NamedTemporaryFile(delete=False)
18
+
19
+
20
+ @spaces.GPU(duration=10)
21
+ def clear_audio(audio: np.ndarray):
22
+ # Save the audio file
23
+ audio_file = create_temp_file()
24
+ np.save(audio_file.name, audio)
25
+
26
+ # Load the audio file
27
+ audio, _ = load_audio(audio_file.name, sr=df_state.sr())
28
+ enhanced = enhance(df_model, df_state, audio)
29
+
30
+ # Save the enhanced audio file
31
+ save_audio(audio_file.name, enhanced, df_state.sr())
32
+
33
+ return gr.update( # speaker_audio, output_audio
34
+ value=audio_file.name,
35
+ )
36
+
37
+
38
+ @spaces.GPU(duration=20)
39
+ def gen_audio(
40
+ text,
41
+ language,
42
+ speaker_audio: np.ndarray,
43
+ tts_alpha,
44
+ tts_beta,
45
+ tts_diffusion_steps,
46
+ tts_embedding_scale,
47
+ ):
48
+ # Save the speaker audio file
49
+ speaker_audio_file = create_temp_file()
50
+ np.save(speaker_audio_file.name, speaker_audio)
51
+
52
+ # Generate the audio
53
+ output = styletts2_model.inference(
54
+ text=text,
55
+ target_voice_path=speaker_audio_file.name,
56
+ output_wav_file=create_temp_file().name,
57
+ alpha=float(tts_alpha),
58
+ beta=float(tts_beta),
59
+ diffusion_steps=int(tts_diffusion_steps),
60
+ embedding_scale=int(tts_embedding_scale),
61
+ )
62
+
63
+ return gr.update( # output_audio
64
+ value=output,
65
+ )
tabs/audios/load_models.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from df.enhance import init_df
3
+ from styletts2 import tts
4
+
5
+ from config import Config
6
+
7
+
8
+ def init_sys():
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ df_model, df_state, _ = init_df()
12
+
13
+ styletts2_model = tts.StyleTTS2()
14
+
15
+ return device, df_model, df_state, styletts2_model
16
+
17
+ device, df_model, df_state, styletts2_model = init_sys()
tabs/audios/ui.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from config import Config
4
+ from .events import *
5
+
6
+
7
+ def audio_tab():
8
+ with gr.Row():
9
+ with gr.Column():
10
+ with gr.Group():
11
+ with gr.Group():
12
+ text = gr.Textbox(lines=5, label="Enter text")
13
+ language = gr.Dropdown(
14
+ label="Language",
15
+ choices=["en"],
16
+ value="en",
17
+ )
18
+
19
+ with gr.Accordion('Voice Clone', open=True):
20
+ speaker_audio = gr.Audio(label="Upload Audio", type='numpy')
21
+ clear_speaker_audio = gr.Button(label="Clear Audio")
22
+
23
+ with gr.Column():
24
+ output_audio = gr.Audio(label="Output Audio", interactive=False, show_download_button=True)
25
+ clear_output_audio = gr.Button(label="Clear Audio")
26
+ generate_audio = gr.Button(label="Generate Audio")
27
+
28
+ with gr.Accordion('Advance Settings', open=True):
29
+ settings = [
30
+ ('Alpha', 'tts_alpha', 'float', 0.0, 1.0, 0.3, 0.1,),
31
+ ('Beta', 'tts_beta', 'float', 0.0, 1.0, 0.7, 0.1,),
32
+ ('Diffusion Steps', 'tts_diffusion_steps', 'int', 1, 100, 10, 1,),
33
+ ('Embedding Scale', 'tts_embedding_scale', 'int', 0, 10, 1, 1,),
34
+ ]
35
+ for label, key, type_, min_, max_, value, step in settings:
36
+ globals()[key] = gr.Slider(label=label, minimum=min_, maximum=max_, value=value, step=step)
37
+
38
+
39
+ # Events
40
+ # Clear Audio
41
+ clear_speaker_audio.click(clear_audio, speaker_audio, speaker_audio)
42
+ clear_output_audio.click(clear_audio, output_audio, output_audio)
43
+
44
+ # Generate Audio
45
+ generate_audio.click(
46
+ gen_audio,
47
+ [text, language, speaker_audio, tts_alpha, tts_beta, tts_diffusion_steps, tts_embedding_scale], # type: ignore
48
+ [output_audio]
49
+ )
tabs/images/load_models.py CHANGED
@@ -10,6 +10,7 @@ from diffusers.schedulers import *
10
 
11
  from config import Config
12
 
 
13
  def init_sys():
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
 
10
 
11
  from config import Config
12
 
13
+
14
  def init_sys():
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16