Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,17 +6,12 @@ import torchaudio
|
|
6 |
import gradio as gr
|
7 |
from transformers import Wav2Vec2FeatureExtractor,AutoConfig
|
8 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
9 |
-
from
|
10 |
-
Wav2Vec2PreTrainedModel,
|
11 |
-
Wav2Vec2Model
|
12 |
-
)
|
13 |
-
from transformers.models.hubert.modeling_hubert import (
|
14 |
-
HubertPreTrainedModel,
|
15 |
-
HubertModel
|
16 |
-
)
|
17 |
|
18 |
config = AutoConfig.from_pretrained("SeyedAli/Persian-Speech-Emotion-HuBert-V1")
|
19 |
-
|
|
|
|
|
20 |
|
21 |
audio_input = gr.Audio(label="صوت گفتار فارسی",type="filepath")
|
22 |
text_output = gr.TextArea(label="هیجان موجود در صوت گفتار",text_align="right",rtl=True,type="text")
|
@@ -30,7 +25,7 @@ def SER(audio):
|
|
30 |
speech_array, _sampling_rate = torchaudio.load(temp_audio_file.name)
|
31 |
resampler = torchaudio.transforms.Resample(_sampling_rate)
|
32 |
speech = resampler(speech_array).squeeze().numpy()
|
33 |
-
inputs =
|
34 |
inputs = {key: inputs[key].to(device) for key in inputs}
|
35 |
|
36 |
with torch.no_grad():
|
|
|
6 |
import gradio as gr
|
7 |
from transformers import Wav2Vec2FeatureExtractor,AutoConfig
|
8 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
9 |
+
from models import Wav2Vec2ForSpeechClassification, HubertForSpeechClassification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
config = AutoConfig.from_pretrained("SeyedAli/Persian-Speech-Emotion-HuBert-V1")
|
12 |
+
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("SeyedAli/Persian-Speech-Emotion-HuBert-V1")
|
13 |
+
model = HubertForSpeechClassification.from_pretrained("SeyedAli/Persian-Speech-Emotion-HuBert-V1")
|
14 |
+
sampling_rate = feature_extractor.sampling_rate
|
15 |
|
16 |
audio_input = gr.Audio(label="صوت گفتار فارسی",type="filepath")
|
17 |
text_output = gr.TextArea(label="هیجان موجود در صوت گفتار",text_align="right",rtl=True,type="text")
|
|
|
25 |
speech_array, _sampling_rate = torchaudio.load(temp_audio_file.name)
|
26 |
resampler = torchaudio.transforms.Resample(_sampling_rate)
|
27 |
speech = resampler(speech_array).squeeze().numpy()
|
28 |
+
inputs = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
|
29 |
inputs = {key: inputs[key].to(device) for key in inputs}
|
30 |
|
31 |
with torch.no_grad():
|