Spaces:

ksang
/

gender_identification

Runtime error

App Files Files Community

ksang commited on May 5, 2023

Commit

ad16c3d

1 Parent(s): 5ffb6b7

Upload app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# %%
+import gradio as gr
+import torchaudio
+from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
+import librosa
+import torch
+# %%
+def dump_pickle(file_path: str, file, mode: str = "wb"):
+    import pickle
+    with open(file_path, mode=mode) as f:
+        pickle.dump(file, f)
+def load_pickle(file_path: str, mode: str = "rb", encoding=""):
+    import pickle
+    with open(file_path, mode=mode) as f:
+        return pickle.load(f, encoding=encoding)
+# %%
+label2id = load_pickle('/data/audio-classification-pytorch/wav2vec2/results/best/label2id.pkl')
+id2label = load_pickle('/data/audio-classification-pytorch/wav2vec2/results/best/id2label.pkl')
+# %%
+model = AutoModelForAudioClassification.from_pretrained(
+    "facebook/wav2vec2-base", num_labels=len(label2id), label2id=label2id, id2label=id2label
+)
+# %%
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+# %%
+checkpoint = torch.load('/data/audio-classification-pytorch/wav2vec2/results/best/pytorch_model.bin')
+# %%
+model.load_state_dict(checkpoint)
+# %%
+def predict(input):
+    waveform, sr = librosa.load(input)
+    waveform = torch.from_numpy(waveform).unsqueeze(0)
+    waveform = torchaudio.transforms.Resample(sr, 16_000)(waveform)
+    inputs = feature_extractor(waveform, sampling_rate=feature_extractor.sampling_rate,
+                            max_length=16000, truncation=True)
+    tensor = torch.tensor(inputs['input_values'][0])
+    with torch.no_grad():
+        output = model(tensor)
+        logits = output['logits'][0]
+        label_id = torch.argmax(logits).item()
+    label_name = id2label[str(label_id)]
+    return label_name
+# %%
+demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Audio(source="microphone", type="filepath", label="Speak to classify your voice!"), # record audio, save in temp file to feed to inference func
+    outputs="text"
+)
+# %%
+demo.launch()
+# %%