wifix199 commited on
Commit
6e05b44
1 Parent(s): 0c2166a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -45
app.py CHANGED
@@ -1,53 +1,52 @@
1
  import gradio as gr
2
- import numpy as np
3
- import torch
4
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # Load the model and vocoder
7
- checkpoint = "microsoft/speecht5_tts"
8
- processor = SpeechT5Processor.from_pretrained(checkpoint)
9
- model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
10
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
11
-
12
- # Speaker embeddings for male and female
13
- speaker_embeddings = {
14
- "male": "speaker/cmu_us_bdl_arctic-wav-arctic_a0009.npy",
15
- "female": "speaker/cmu_us_slt_arctic-wav-arctic_a0508.npy"
16
- }
17
-
18
- # Function to generate speech
19
- def text_to_speech(text, gender):
20
- if len(text.strip()) == 0:
21
- return (16000, np.zeros(0).astype(np.int16))
22
-
23
- inputs = processor(text=text, return_tensors="pt")
24
-
25
- # Truncate input if too long
26
- input_ids = inputs["input_ids"]
27
- input_ids = input_ids[..., :model.config.max_text_positions]
28
-
29
- # Load speaker embedding based on gender selection
30
- speaker_embedding_path = speaker_embeddings[gender]
31
- speaker_embedding = np.load(speaker_embedding_path)
32
- speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
33
-
34
- # Generate speech
35
- speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
36
- speech = (speech.numpy() * 32767).astype(np.int16)
37
-
38
- return (16000, speech)
39
-
40
- # Create the Gradio interface
41
  iface = gr.Interface(
42
- fn=text_to_speech,
43
  inputs=[
44
- gr.Textbox(label="Enter Text"),
45
- gr.Radio(["male", "female"], label="Select Voice Gender") # Gender selection
 
46
  ],
47
- outputs=gr.Audio(label="Generated Speech"),
48
- title="Text-to-Speech Bot",
49
- description="Enter text and select a voice gender to generate speech."
 
50
  )
51
 
52
- # Launch the interface
53
  iface.launch()
 
1
  import gradio as gr
2
+ from gtts import gTTS
3
+ from pydub import AudioSegment
4
+ import tempfile
5
+ import os
6
+
7
+ def generate_tts(text, language, pitch_shift):
8
+ tts = gTTS(text=text, lang=language)
9
+
10
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
11
+ tts.save(temp_file.name)
12
+ temp_file_path = temp_file.name
13
+
14
+ try:
15
+ audio = AudioSegment.from_file(temp_file_path, format="mp3")
16
+ except Exception as e:
17
+ print(f"Error loading audio file: {e}")
18
+ return None
19
+
20
+ # Apply pitch shifting (more pronounced)
21
+ if pitch_shift:
22
+ audio = audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * 0.8)})
23
+ audio = audio.set_frame_rate(44100)
24
+
25
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as out_file:
26
+ audio.export(out_file.name, format="mp3")
27
+ out_file_path = out_file.name
28
+
29
+ os.remove(temp_file_path)
30
+
31
+ return out_file_path
32
+
33
+ def chatbot(text, language, male_voice):
34
+ output_audio_path = generate_tts(text, language, male_voice)
35
+ if output_audio_path is None:
36
+ return "Error generating audio"
37
+ return output_audio_path
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  iface = gr.Interface(
40
+ fn=chatbot,
41
  inputs=[
42
+ gr.Textbox(label="Enter your text"),
43
+ gr.Dropdown(label="Select Language", choices=["en", "es", "fr", "de", "it", "hi"], value="en"),
44
+ gr.Checkbox(label="Male Voice", value=True)
45
  ],
46
+ outputs=gr.Audio(label="Generated Audio"),
47
+ live=True,
48
+ title="Text-to-Speech AI Chatbot",
49
+ description="Generate speech with different languages and voice options."
50
  )
51
 
 
52
  iface.launch()