poemsforaphrodite commited on
Commit
471fe68
·
verified ·
1 Parent(s): 933f050

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +59 -23
  2. requirements.txt +1 -1
app.py CHANGED
@@ -14,11 +14,27 @@ def load_tts_model():
14
 
15
  tts = load_tts_model()
16
 
17
- def clone(text, audio_file, language, speaking_rate, pitch, volume,
18
- emotion, sample_rate, temperature, seed):
19
- if seed is not None:
20
- torch.manual_seed(seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
 
22
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
23
  temp_audio_path = temp_audio.name
24
 
@@ -31,28 +47,48 @@ def clone(text, audio_file, language, speaking_rate, pitch, volume,
31
 
32
  return temp_audio_path
33
 
 
 
 
 
34
  # Define Gradio interface
35
- iface = gr.Interface(
36
- fn=clone,
37
- inputs=[
38
- gr.Textbox(label="Text"),
39
- gr.Audio(label="Voice reference audio file", type="filepath"),
40
- gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en"),
41
- gr.Slider(0.5, 2.0, value=1.0, label="Speaking Rate"),
42
- gr.Slider(-10, 10, value=0, label="Pitch Adjustment"),
43
- gr.Slider(0.1, 2.0, value=1.0, label="Volume"),
44
- gr.Dropdown(["neutral", "happy", "sad", "angry"], label="Emotion", value="neutral"),
45
- gr.Dropdown([22050, 24000, 44100, 48000], label="Sample Rate", value=24000),
46
- gr.Slider(0.1, 1.0, value=0.8, label="Temperature"),
47
- gr.Number(label="Seed (optional)")
48
- ],
49
- outputs=gr.Audio(label="Generated Audio"),
50
- title="Advanced Voice Clone",
51
- description="Customize your voice cloning experience with various parameters."
52
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Launch the interface
55
- iface.launch()
56
 
57
  # Clean up temporary files (this will run after the Gradio server is closed)
58
  for file in os.listdir():
 
14
 
15
  tts = load_tts_model()
16
 
17
+ # Celebrity voices (example list, you may want to expand or modify this)
18
+ celebrity_voices = {
19
+ "Morgan Freeman": "path/to/morgan_freeman_sample.wav",
20
+ "Scarlett Johansson": "path/to/scarlett_johansson_sample.wav",
21
+ "David Attenborough": "path/to/david_attenborough_sample.wav",
22
+ }
23
+
24
+ def tts_generate(text, voice, language):
25
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
26
+ temp_audio_path = temp_audio.name
27
+
28
+ tts.tts_to_file(
29
+ text=text,
30
+ speaker_wav=celebrity_voices[voice],
31
+ language=language,
32
+ file_path=temp_audio_path
33
+ )
34
 
35
+ return temp_audio_path
36
+
37
+ def clone_voice(text, audio_file, language):
38
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
39
  temp_audio_path = temp_audio.name
40
 
 
47
 
48
  return temp_audio_path
49
 
50
+ # Placeholder function for Talking Image tab
51
+ def talking_image_placeholder():
52
+ return "Talking Image functionality not implemented yet."
53
+
54
  # Define Gradio interface
55
+ with gr.Blocks() as demo:
56
+ gr.Markdown("# Advanced Voice Synthesis")
57
+
58
+ with gr.Tabs():
59
+ with gr.TabItem("TTS"):
60
+ with gr.Row():
61
+ tts_text = gr.Textbox(label="Text to speak")
62
+ tts_voice = gr.Dropdown(choices=list(celebrity_voices.keys()), label="Celebrity Voice")
63
+ tts_language = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en")
64
+ tts_generate_btn = gr.Button("Generate")
65
+ tts_output = gr.Audio(label="Generated Audio")
66
+
67
+ tts_generate_btn.click(
68
+ tts_generate,
69
+ inputs=[tts_text, tts_voice, tts_language],
70
+ outputs=tts_output
71
+ )
72
+
73
+ with gr.TabItem("Talking Image"):
74
+ gr.Markdown("Talking Image functionality coming soon!")
75
+
76
+ with gr.TabItem("Clone Voice"):
77
+ with gr.Row():
78
+ clone_text = gr.Textbox(label="Text to speak")
79
+ clone_audio = gr.Audio(label="Voice reference audio file", type="filepath")
80
+ clone_language = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en")
81
+ clone_generate_btn = gr.Button("Generate")
82
+ clone_output = gr.Audio(label="Generated Audio")
83
+
84
+ clone_generate_btn.click(
85
+ clone_voice,
86
+ inputs=[clone_text, clone_audio, clone_language],
87
+ outputs=clone_output
88
+ )
89
 
90
  # Launch the interface
91
+ demo.launch()
92
 
93
  # Clean up temporary files (this will run after the Gradio server is closed)
94
  for file in os.listdir():
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- gradio
2
  torch
3
  TTS
 
1
+ streamlit
2
  torch
3
  TTS