ayush2607 commited on
Commit
717038c
·
verified ·
1 Parent(s): 04fe302

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -2,13 +2,13 @@ import gradio as gr
2
  import torch
3
  import os
4
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
- from datasets import load_dataset,Audio
6
  import numpy as np
7
  from speechbrain.inference import EncoderClassifier
8
 
9
  # Load models and processor
10
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
11
- model = SpeechT5ForTextToSpeech.from_pretrained("ayush2607/speecht5_tts_technical_data")
12
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
13
 
14
  # Load speaker encoder
@@ -20,9 +20,15 @@ speaker_model = EncoderClassifier.from_hparams(
20
  )
21
 
22
  # Load a sample from the dataset for speaker embedding
23
- dataset = load_dataset("Yassmen/TTS_English_Technical_data", split="train")
24
- dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
25
- sample = dataset[0]
 
 
 
 
 
 
26
 
27
  def create_speaker_embedding(waveform):
28
  with torch.no_grad():
@@ -31,9 +37,6 @@ def create_speaker_embedding(waveform):
31
  speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
32
  return speaker_embeddings
33
 
34
- # Create a speaker embedding from the sample
35
- speaker_embedding = create_speaker_embedding(sample['audio']['array'])
36
-
37
  def text_to_speech(text):
38
  # Clean up text
39
  replacements = [
 
2
  import torch
3
  import os
4
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
+ from datasets import load_dataset, Audio
6
  import numpy as np
7
  from speechbrain.inference import EncoderClassifier
8
 
9
  # Load models and processor
10
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
11
+ model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_FINE_TUNED_MODEL_PATH")
12
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
13
 
14
  # Load speaker encoder
 
20
  )
21
 
22
  # Load a sample from the dataset for speaker embedding
23
+ try:
24
+ dataset = load_dataset("Yassmen/TTS_English_Technical_data", split="train")
25
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
26
+ sample = dataset[0]
27
+ speaker_embedding = create_speaker_embedding(sample['audio']['array'])
28
+ except Exception as e:
29
+ print(f"Error loading dataset: {e}")
30
+ # Use a random speaker embedding as fallback
31
+ speaker_embedding = torch.randn(1, 512)
32
 
33
  def create_speaker_embedding(waveform):
34
  with torch.no_grad():
 
37
  speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
38
  return speaker_embeddings
39
 
 
 
 
40
  def text_to_speech(text):
41
  # Clean up text
42
  replacements = [