File size: 1,645 Bytes
828af2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import torch
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
import librosa
import numpy as np
# تحميل النموذج والمعالج من Hugging Face
model_name = "facebook/wav2vec2-large-xlsr-53"
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name, num_labels=7)
processor = Wav2Vec2Processor.from_pretrained(model_name)
# دالة لمعالجة الصوت وتحويله إلى مشاعر
def recognize_emotion(audio):
# تحميل الصوت باستخدام librosa
audio_input, _ = librosa.load(audio, sr=16000)
# استخراج الميزات باستخدام Wav2Vec2 Processor
inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
# تمرير البيانات عبر النموذج
with torch.no_grad():
logits = model(**inputs).logits
# تحويل القيم إلى المشاعر
emotion_map = {
0: "Neutral",
1: "Happy",
2: "Angry",
3: "Sad",
4: "Surprised",
5: "Fearful",
6: "Disgusted"
}
# تصنيف الصوت
predicted_class = torch.argmax(logits, dim=-1).item()
emotion = emotion_map[predicted_class]
return emotion
# واجهة Gradio
iface = gr.Interface(
fn=recognize_emotion,
inputs=gr.inputs.Audio(source="microphone", type="filepath"),
outputs="text",
title="Speech Emotion Recognition",
description="Identify the emotion in the speech: Happy, Sad, Angry, Surprised, Neutral, Fearful, or Disgusted."
)
# تشغيل الواجهة
iface.launch()
|