hatmanstack commited on
Commit
938bf7e
·
1 Parent(s): 4a065d2

ZeroGPU to CPU

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import spaces
3
  import torch
4
  import torchaudio
5
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
@@ -9,29 +9,27 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
9
  model_name = "Hatman/audio-emotion-detection"
10
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
11
  model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
12
- print(device)
13
-
14
 
15
  def preprocess_audio(audio):
16
  waveform, sampling_rate = torchaudio.load(audio)
17
  resampled_waveform = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)(waveform)
18
  return {'speech': resampled_waveform.numpy().flatten(), 'sampling_rate': 16000}
19
 
20
- @spaces.GPU
21
  def inference(audio):
22
  example = preprocess_audio(audio)
23
  inputs = feature_extractor(example['speech'], sampling_rate=16000, return_tensors="pt", padding=True)
24
- inputs = inputs # Move inputs to GPU
25
  with torch.no_grad():
26
  logits = model(**inputs).logits
27
  predicted_ids = torch.argmax(logits, dim=-1)
28
- return model.config.id2label[predicted_ids.item()], logits, predicted_ids # Move tensors back to CPU for further processing
29
 
30
- @spaces.GPU
31
  def inference_label(audio):
32
  example = preprocess_audio(audio)
33
  inputs = feature_extractor(example['speech'], sampling_rate=16000, return_tensors="pt", padding=True)
34
- inputs = inputs # Move inputs to GPU
35
  with torch.no_grad():
36
  logits = model(**inputs).logits
37
  predicted_ids = torch.argmax(logits, dim=-1)
 
1
  import gradio as gr
2
+ #import spaces ## For ZeroGPU
3
  import torch
4
  import torchaudio
5
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
 
9
  model_name = "Hatman/audio-emotion-detection"
10
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
11
  model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
 
 
12
 
13
  def preprocess_audio(audio):
14
  waveform, sampling_rate = torchaudio.load(audio)
15
  resampled_waveform = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)(waveform)
16
  return {'speech': resampled_waveform.numpy().flatten(), 'sampling_rate': 16000}
17
 
18
+ #@spaces.GPU ## For ZeroGPU
19
  def inference(audio):
20
  example = preprocess_audio(audio)
21
  inputs = feature_extractor(example['speech'], sampling_rate=16000, return_tensors="pt", padding=True)
22
+ inputs = {k: v.to('cpu') for k, v in inputs.items()} # Not necessary on ZeroGPU
23
  with torch.no_grad():
24
  logits = model(**inputs).logits
25
  predicted_ids = torch.argmax(logits, dim=-1)
26
+ return model.config.id2label[predicted_ids.item()], logits, predicted_ids
27
 
28
+ #@spaces.GPU ## For ZeroGPU
29
  def inference_label(audio):
30
  example = preprocess_audio(audio)
31
  inputs = feature_extractor(example['speech'], sampling_rate=16000, return_tensors="pt", padding=True)
32
+ inputs = {k: v.to('cpu') for k, v in inputs.items()} # Not necessary on ZeroGPU
33
  with torch.no_grad():
34
  logits = model(**inputs).logits
35
  predicted_ids = torch.argmax(logits, dim=-1)