andromeda01111 commited on
Commit
7c32789
·
verified ·
1 Parent(s): 86726e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -58
app.py CHANGED
@@ -1,60 +1,3 @@
1
  import gradio as gr
2
- import torch
3
- import torch.nn as nn
4
- import torch.nn.functional as F
5
- import torchaudio
6
- from transformers import AutoConfig, Wav2Vec2Processor, Wav2Vec2FeatureExtractor
7
- from src.models import Wav2Vec2ForSpeechClassification
8
 
9
- import librosa
10
- import IPython.display as ipd
11
- import numpy as np
12
- import pandas as pd
13
- import os
14
-
15
- model_name_or_path = "andromeda01111/Malayalam_SA"
16
- config = AutoConfig.from_pretrained(model_name_or_path)
17
- feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
18
- sampling_rate = feature_extractor.sampling_rate
19
- model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path)
20
-
21
- def speech_file_to_array_fn(path, sampling_rate):
22
- speech_array, _sampling_rate = torchaudio.load(path)
23
- resampler = torchaudio.transforms.Resample(_sampling_rate)
24
- speech = resampler(speech_array).squeeze().numpy()
25
- return speech
26
-
27
- def predict(path, sampling_rate):
28
- speech = speech_file_to_array_fn(path, sampling_rate)
29
- features = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
30
-
31
- input_values = features.input_values
32
- attention_mask = features.attention_mask
33
-
34
- with torch.no_grad():
35
- logits = model(input_values, attention_mask=attention_mask).logits
36
-
37
- scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
38
- outputs = [{"Emotion": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
39
-
40
- return outputs
41
-
42
- # Wrapper function for Gradio
43
- def gradio_predict(audio):
44
- predictions = predict(audio)
45
- return [f"{pred['Emotion']}: {pred['Score']}" for pred in predictions]
46
-
47
- # Gradio interface
48
- emotions = [config.id2label[i] for i in range(len(config.id2label))]
49
- outputs = [gr.Textbox(label=emotion, interactive=False) for emotion in emotions]
50
-
51
- interface = gr.Interface(
52
- fn=predict,
53
- inputs=gr.Audio(label="Upload Audio", type="filepath"),
54
- outputs=outputs,
55
- title="Emotion Recognition",
56
- description="Upload an audio file to predict emotions and their corresponding percentages.",
57
- )
58
-
59
- # Launch the app
60
- interface.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
2
 
3
+ gr.Interface.load("models/andromeda01111/Malayalam_SA").launch()