KIMOSSINO commited on
Commit
41a5749
·
verified ·
1 Parent(s): e12f2b2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoProcessor, AutoModel
4
+ import numpy as np
5
+
6
+ # Initialize models and processors
7
+ model_id = "facebook/mms-tts"
8
+ processor = AutoProcessor.from_pretrained(model_id)
9
+ model = AutoModel.from_pretrained(model_id)
10
+
11
+ LANGUAGES = {
12
+ "English": "eng",
13
+ "French": "fra",
14
+ "Spanish": "spa"
15
+ }
16
+
17
+ SPEAKERS = {
18
+ "Male": 0,
19
+ "Female": 1
20
+ }
21
+
22
+ def text_to_speech(text, language, speaker_gender, speed):
23
+ try:
24
+ # Prepare inputs
25
+ inputs = processor(
26
+ text=text,
27
+ language=LANGUAGES[language],
28
+ return_tensors="pt",
29
+ )
30
+
31
+ # Generate speech
32
+ with torch.no_grad():
33
+ output = model.generate(
34
+ **inputs,
35
+ speaker_id=torch.tensor([SPEAKERS[speaker_gender]]),
36
+ speed_ratios=torch.tensor([speed])
37
+ )
38
+
39
+ # Convert to waveform
40
+ speech = output[0].cpu().numpy()
41
+ sampling_rate = model.config.sampling_rate
42
+
43
+ return (sampling_rate, speech)
44
+ except Exception as e:
45
+ return None
46
+
47
+ # Create Gradio interface
48
+ def create_interface():
49
+ with gr.Blocks(theme=gr.themes.Soft(
50
+ primary_hue="blue",
51
+ secondary_hue="gray",
52
+ )) as demo:
53
+ gr.Markdown(
54
+ """
55
+ # 🎙️ Multilingual Text-to-Speech
56
+ Convert text to natural-sounding speech in multiple languages.
57
+ """
58
+ )
59
+
60
+ with gr.Row():
61
+ with gr.Column():
62
+ text_input = gr.Textbox(
63
+ label="Enter Text",
64
+ placeholder="Type your text here...",
65
+ lines=5
66
+ )
67
+ language = gr.Dropdown(
68
+ choices=list(LANGUAGES.keys()),
69
+ value="English",
70
+ label="Language"
71
+ )
72
+ speaker = gr.Radio(
73
+ choices=list(SPEAKERS.keys()),
74
+ value="Male",
75
+ label="Speaker Gender"
76
+ )
77
+ speed = gr.Slider(
78
+ minimum=0.5,
79
+ maximum=2.0,
80
+ value=1.0,
81
+ step=0.1,
82
+ label="Speech Speed"
83
+ )
84
+ submit_btn = gr.Button("Generate Speech", variant="primary")
85
+
86
+ with gr.Column():
87
+ audio_output = gr.Audio(
88
+ label="Generated Speech",
89
+ type="numpy"
90
+ )
91
+
92
+ submit_btn.click(
93
+ fn=text_to_speech,
94
+ inputs=[text_input, language, speaker, speed],
95
+ outputs=audio_output
96
+ )
97
+
98
+ gr.Markdown(
99
+ """
100
+ ### Features:
101
+ - Support for English, French, and Spanish
102
+ - Male and Female voice options
103
+ - Adjustable speech speed
104
+ - High-quality, natural-sounding voices
105
+ """
106
+ )
107
+
108
+ return demo
109
+
110
+ demo = create_interface()
111
+ demo.launch()