import gradio as gr import spaces import torch from transformers import AutoTokenizer,VitsModel import os import numpy as np token=os.environ.get("key_") tokenizer = AutoTokenizer.from_pretrained("wasmdashai/vtk",token=token) models= {} @spaces.GPU def get_model(name_model): global models if name_model in models: return models[name_model] models[name_model]=VitsModel.from_pretrained(name_model,token=token).cuda() models[name_model].decoder.apply_weight_norm() # torch.nn.utils.weight_norm(self.decoder.conv_pre) # torch.nn.utils.weight_norm(self.decoder.conv_post) for flow in models[name_model].flow.flows: torch.nn.utils.weight_norm(flow.conv_pre) torch.nn.utils.weight_norm(flow.conv_post) return models[name_model] zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' 🤔 import torch @spaces.GPU def modelspeech(text,name_model): inputs = tokenizer(text, return_tensors="pt") model=get_model(name_model) with torch.no_grad(): wav = model(input_ids=inputs["input_ids"].cuda()).waveform.cpu().numpy().reshape(-1)#.detach() return model.config.sampling_rate,wav#remove_noise_nr(wav) model_choices = gr.Dropdown( choices=[ "wasmdashai/vits-ar", "wasmdashai/vits-ar-v1", "wasmdashai/vits-ar-sa-huba", "wasmdashai/vits-ar-sa-ms", "wasmdashai/vits-ar-sa-magd", "wasmdashai/vtk", "wasmdashai/mak", "wasmdashai/vits-ar-sa-huba-v1", "wasmdashai/vits-ar-sa-huba-v2", "wasmdashai/vits-ar-z1", "wasmdashai/vits-ar-sa-A" ], label="اختر النموذج", value="wasmdashai/vits-ar-sa-huba-v1", ) demo = gr.Interface(fn=modelspeech, inputs=["text",model_choices], outputs=["audio"]) demo.queue() demo.launch()