KIMOSSINO commited on
Commit
f5001e3
·
verified ·
1 Parent(s): 85fbbb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -22
app.py CHANGED
@@ -1,35 +1,38 @@
1
  import gradio as gr
2
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
3
  import torch
4
- import librosa
5
  import numpy as np
6
  from scipy.io.wavfile import write
 
7
 
8
  # تحميل النماذج والمُعالج
9
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
10
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
11
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
12
 
13
- # تعيين القيم الافتراضية لمتغيرات الصوت
14
  LANGUAGES = {"English": "en", "French": "fr", "Spanish": "es"}
15
 
16
  def generate_speaker_embedding(speaker_type):
17
- """توليد تعبيرات الصوت بناءً على النوع"""
 
 
18
  base_embedding = torch.randn(1, 512)
19
  if speaker_type == "Female":
20
  return base_embedding * 0.8
21
- else: # Male
22
- return base_embedding * 1.2
23
 
24
- def adjust_speed(audio, speed):
25
- """تعديل سرعة الصوت باستخدام مكتبة librosa"""
26
- if speed == 1.0:
27
- return audio
28
  return librosa.effects.time_stretch(audio, speed)
29
 
30
  def text_to_speech(text, language, speaker_type, speed):
 
 
 
31
  try:
32
- # إنشاء تعبيرات الصوت
33
  speaker_embeddings = generate_speaker_embedding(speaker_type)
34
 
35
  # معالجة النص
@@ -37,25 +40,26 @@ def text_to_speech(text, language, speaker_type, speed):
37
 
38
  # توليد الصوت
39
  generated_speech = model.generate_speech(
40
- inputs["input_ids"],
41
- speaker_embeddings,
42
  vocoder=vocoder
43
  ).cpu().numpy()
44
 
45
- # ضبط سرعة الصوت
46
  adjusted_speech = adjust_speed(generated_speech, speed)
47
 
48
- # تحويل الصوت إلى ملف WAV
49
  output_file = "output.wav"
50
- write(output_file, 24000, adjusted_speech.astype(np.float32))
51
 
52
  return output_file
53
  except Exception as e:
54
- return f"Error: {str(e)}"
 
55
 
56
  # إنشاء واجهة Gradio
57
  def create_interface():
58
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
59
  gr.Markdown("# 🎙️ Multilingual Text-to-Speech")
60
 
61
  with gr.Row():
@@ -64,7 +68,7 @@ def create_interface():
64
  language = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language")
65
  speaker = gr.Radio(choices=["Male", "Female"], value="Male", label="Speaker Gender")
66
  speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed")
67
- submit_btn = gr.Button("Generate Speech", variant="primary")
68
 
69
  with gr.Column():
70
  audio_output = gr.Audio(label="Generated Speech", type="filepath")
@@ -77,12 +81,13 @@ def create_interface():
77
 
78
  gr.Markdown("""
79
  ### Features:
80
- - Multilingual support: English, French, and Spanish.
81
- - Male and Female voice options.
82
- - Adjustable speech speed.
83
- - High-quality, natural-sounding voices.
84
  """)
85
  return demo
86
 
 
87
  demo = create_interface()
88
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
3
  import torch
 
4
  import numpy as np
5
  from scipy.io.wavfile import write
6
+ import librosa
7
 
8
  # تحميل النماذج والمُعالج
9
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
10
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
11
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
12
 
 
13
  LANGUAGES = {"English": "en", "French": "fr", "Spanish": "es"}
14
 
15
  def generate_speaker_embedding(speaker_type):
16
+ """
17
+ توليد تعبيرات الصوت بناءً على نوع الصوت (ذكر أو أنثى).
18
+ """
19
  base_embedding = torch.randn(1, 512)
20
  if speaker_type == "Female":
21
  return base_embedding * 0.8
22
+ return base_embedding * 1.2
 
23
 
24
+ def adjust_speed(audio, speed, sampling_rate=24000):
25
+ """
26
+ تعديل سرعة الصوت باستخدام مكتبة librosa.
27
+ """
28
  return librosa.effects.time_stretch(audio, speed)
29
 
30
  def text_to_speech(text, language, speaker_type, speed):
31
+ """
32
+ تحويل النص إلى صوت.
33
+ """
34
  try:
35
+ # توليد تعبيرات الصوت بناءً على نوع المتحدث
36
  speaker_embeddings = generate_speaker_embedding(speaker_type)
37
 
38
  # معالجة النص
 
40
 
41
  # توليد الصوت
42
  generated_speech = model.generate_speech(
43
+ inputs["input_ids"],
44
+ speaker_embeddings,
45
  vocoder=vocoder
46
  ).cpu().numpy()
47
 
48
+ # تعديل سرعة الصوت
49
  adjusted_speech = adjust_speed(generated_speech, speed)
50
 
51
+ # حفظ الصوت كملف WAV
52
  output_file = "output.wav"
53
+ write(output_file, 24000, (adjusted_speech * 32767).astype(np.int16))
54
 
55
  return output_file
56
  except Exception as e:
57
+ print(f"Error: {e}")
58
+ return None
59
 
60
  # إنشاء واجهة Gradio
61
  def create_interface():
62
+ with gr.Blocks() as demo:
63
  gr.Markdown("# 🎙️ Multilingual Text-to-Speech")
64
 
65
  with gr.Row():
 
68
  language = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language")
69
  speaker = gr.Radio(choices=["Male", "Female"], value="Male", label="Speaker Gender")
70
  speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed")
71
+ submit_btn = gr.Button("Generate Speech")
72
 
73
  with gr.Column():
74
  audio_output = gr.Audio(label="Generated Speech", type="filepath")
 
81
 
82
  gr.Markdown("""
83
  ### Features:
84
+ - Multilingual support (English, French, Spanish)
85
+ - Male and Female voice options
86
+ - Adjustable speech speed
87
+ - High-quality, natural-sounding voices
88
  """)
89
  return demo
90
 
91
+ # تشغيل التطبيق
92
  demo = create_interface()
93
  demo.launch()