nisten commited on
Commit
2037e5f
·
verified ·
1 Parent(s): cf68626

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -1,10 +1,12 @@
 
1
  import gradio as gr
2
  import torch
3
  import soundfile as sf
4
  from snac import SNAC
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
 
7
- device = torch.device("cpu") # Changed to CPU mode
 
8
 
9
  def find_last_instance_of_separator(lst, element=50258):
10
  reversed_list = lst[::-1]
@@ -55,7 +57,7 @@ def reconstruct_tensors(flattened_output):
55
  tensor2.append(flattened_output[i+5])
56
  tensor3.append(flattened_output[i+6])
57
  tensor3.append(flattened_output[i+7])
58
- codes = [list_to_torch_tensor(tensor1), list_to_torch_tensor(tensor2), list_to_torch_tensor(tensor3)]
59
 
60
  if n_tensors == 15:
61
  for i in range(0, len(flattened_output), 16):
@@ -74,20 +76,20 @@ def reconstruct_tensors(flattened_output):
74
  tensor3.append(flattened_output[i+13])
75
  tensor4.append(flattened_output[i+14])
76
  tensor4.append(flattened_output[i+15])
77
- codes = [list_to_torch_tensor(tensor1), list_to_torch_tensor(tensor2), list_to_torch_tensor(tensor3), list_to_torch_tensor(tensor4)]
78
 
79
  return codes
80
 
81
  def load_model():
82
  tokenizer = AutoTokenizer.from_pretrained("Lwasinam/voicera-jenny-finetune")
83
  model = AutoModelForCausalLM.from_pretrained("Lwasinam/voicera-jenny-finetune").to(device)
84
- snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval()
85
  return model, tokenizer, snac_model
86
 
87
  def SpeechDecoder(codes, snac_model):
88
  codes = codes.squeeze(0).tolist()
89
  reconstructed_codes = reconstruct_tensors(codes)
90
- audio_hat = snac_model.to(device).decode(reconstructed_codes)
91
  audio_path = "reconstructed_audio.wav"
92
  sf.write(audio_path, audio_hat.squeeze().cpu().detach().numpy(), 24000)
93
  return audio_path
@@ -117,4 +119,4 @@ iface = gr.Interface(
117
  )
118
 
119
  if __name__ == "__main__":
120
- iface.launch(share=True)
 
1
+ !pip install nvidia-ml-py3
2
  import gradio as gr
3
  import torch
4
  import soundfile as sf
5
  from snac import SNAC
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
+ # Ensure the code uses NVIDIA GPUs
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
11
  def find_last_instance_of_separator(lst, element=50258):
12
  reversed_list = lst[::-1]
 
57
  tensor2.append(flattened_output[i+5])
58
  tensor3.append(flattened_output[i+6])
59
  tensor3.append(flattened_output[i+7])
60
+ codes = [list_to_torch_tensor(tensor1).to(device), list_to_torch_tensor(tensor2).to(device), list_to_torch_tensor(tensor3).to(device)]
61
 
62
  if n_tensors == 15:
63
  for i in range(0, len(flattened_output), 16):
 
76
  tensor3.append(flattened_output[i+13])
77
  tensor4.append(flattened_output[i+14])
78
  tensor4.append(flattened_output[i+15])
79
+ codes = [list_to_torch_tensor(tensor1).to(device), list_to_torch_tensor(tensor2).to(device), list_to_torch_tensor(tensor3).to(device), list_to_torch_tensor(tensor4).to(device)]
80
 
81
  return codes
82
 
83
  def load_model():
84
  tokenizer = AutoTokenizer.from_pretrained("Lwasinam/voicera-jenny-finetune")
85
  model = AutoModelForCausalLM.from_pretrained("Lwasinam/voicera-jenny-finetune").to(device)
86
+ snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval().to(device)
87
  return model, tokenizer, snac_model
88
 
89
  def SpeechDecoder(codes, snac_model):
90
  codes = codes.squeeze(0).tolist()
91
  reconstructed_codes = reconstruct_tensors(codes)
92
+ audio_hat = snac_model.decode(reconstructed_codes)
93
  audio_path = "reconstructed_audio.wav"
94
  sf.write(audio_path, audio_hat.squeeze().cpu().detach().numpy(), 24000)
95
  return audio_path
 
119
  )
120
 
121
  if __name__ == "__main__":
122
+ iface.launch()