Spaces:

alec228
/

audio-sentiment

Runtime error

App Files Files Community

alec228 commited on Jul 14

Commit

2a0b1db

1 Parent(s): c23173c

Initial commit

Browse files

Files changed (3) hide show

README_HF.md +66 -0
app.py +183 -0
requirements_hf.txt +10 -0

README_HF.md ADDED Viewed

	@@ -0,0 +1,66 @@

+# 🎤 Analyse de Sentiment Audio
+Ce Space Hugging Face permet d'analyser le sentiment d'extraits audio en français en combinant transcription et analyse de sentiment.
+## 🚀 Fonctionnalités
+- **🎙️ Transcription audio** : Utilise Wav2Vec2 pour transcrire l'audio en français
+- **😊 Analyse de sentiment** : Analyse le sentiment du texte transcrit avec BERT multilingue
+- **📊 Analyse détaillée** : Segmentation par phrase avec scores de confiance
+- **💾 Export CSV** : Sauvegarde de l'historique des analyses
+- **🎯 Interface intuitive** : Interface Gradio moderne et responsive
+## 🛠️ Technologies utilisées
+- **Transcription** : `jonatasgrosman/wav2vec2-large-xlsr-53-french`
+- **Sentiment** : `nlptown/bert-base-multilingual-uncased-sentiment`
+- **Interface** : Gradio
+- **Backend** : PyTorch, Transformers
+## 📖 Comment utiliser
+1. **Enregistrez** votre voix directement dans le navigateur
+2. **Ou téléversez** un fichier audio (WAV recommandé)
+3. **Cliquez** sur "Analyser" pour lancer le traitement
+4. **Visualisez** les résultats : transcription, sentiment, et analyse détaillée
+5. **Exportez** l'historique au format CSV si nécessaire
+## 🎯 Cas d'usage
+- Analyse de sentiment sur des appels clients
+- Évaluation de podcasts ou interviews
+- Validation d'analyses qualitatives de contenu audio
+- Proof of Concept pour architectures multimodales
+## 🔧 Architecture
+Le pipeline combine :
+1. **Extraction audio** → Prétraitement et normalisation
+2. **Transcription** → Wav2Vec2 pour la reconnaissance vocale
+3. **Analyse sentiment** → BERT pour la classification
+4. **Post-traitement** → Segmentation et scoring
+## 📝 Exemple de sortie
+```json
+{
+  "transcription": "je suis très content de ce produit",
+  "sentiment": {
+    "positif": 0.85,
+    "neutre": 0.10,
+    "négatif": 0.05
+  }
+}
+```
+## 🌟 Fonctionnalités avancées
+- **Gestion d'erreurs** robuste
+- **Interface responsive** adaptée mobile/desktop
+- **Historique persistant** des analyses
+- **Export de données** au format CSV
+- **Analyse segmentée** par phrase
+---
+*Développé avec ❤️ pour l'analyse de sentiment audio en français*

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+import re
+from datetime import datetime
+import gradio as gr
+import torch
+import pandas as pd
+import soundfile as sf
+import torchaudio
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+from src.transcription import SpeechEncoder
+from src.sentiment import TextEncoder
+# Configuration pour Hugging Face Spaces
+HF_SPACE = os.getenv("HF_SPACE", "false").lower() == "true"
+# Préchargement des modèles
+print("Chargement des modèles...")
+processor_ctc = Wav2Vec2Processor.from_pretrained(
+    "jonatasgrosman/wav2vec2-large-xlsr-53-french",
+    cache_dir="./models" if not HF_SPACE else None
+)
+model_ctc = Wav2Vec2ForCTC.from_pretrained(
+    "jonatasgrosman/wav2vec2-large-xlsr-53-french",
+    cache_dir="./models" if not HF_SPACE else None
+)
+speech_enc = SpeechEncoder()
+text_enc = TextEncoder()
+print("Modèles chargés avec succès!")
+# Pipeline d'analyse
+def analyze_audio(audio_path):
+    if audio_path is None:
+        return "Aucun audio fourni", "", pd.DataFrame(), {}
+    try:
+        # Lecture et prétraitement
+        data, sr = sf.read(audio_path)
+        arr = data.T if data.ndim > 1 else data
+        wav = torch.from_numpy(arr).unsqueeze(0).float()
+        if sr != 16000:
+            wav = torchaudio.transforms.Resample(sr, 16000)(wav)
+            sr = 16000
+        if wav.size(0) > 1:
+            wav = wav.mean(dim=0, keepdim=True)
+        # Transcription
+        inputs = processor_ctc(wav.squeeze().numpy(), sampling_rate=sr, return_tensors="pt")
+        with torch.no_grad():
+            logits = model_ctc(**inputs).logits
+        pred_ids = torch.argmax(logits, dim=-1)
+        transcription = processor_ctc.batch_decode(pred_ids)[0].lower()
+        # Sentiment principal
+        sent_dict = TextEncoder.analyze_sentiment(transcription)
+        label, conf = max(sent_dict.items(), key=lambda x: x[1])
+        emojis = {"positif": "😊", "neutre": "😐", "négatif": "☹️"}
+        emoji = emojis.get(label, "")
+        # Segmentation par phrase
+        segments = [s.strip() for s in re.split(r'[.?!]', transcription) if s.strip()]
+        seg_results = []
+        for seg in segments:
+            sd = TextEncoder.analyze_sentiment(seg)
+            l, c = max(sd.items(), key=lambda x: x[1])
+            seg_results.append({"Segment": seg, "Sentiment": l.capitalize(), "Confiance (%)": round(c*100,1)})
+        seg_df = pd.DataFrame(seg_results)
+        # Historique entry
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        history_entry = {
+            "Horodatage": timestamp,
+            "Transcription": transcription,
+            "Sentiment": label.capitalize(),
+            "Confiance (%)": round(conf*100,1)
+        }
+        # Rendu
+        summary_html = (
+            f"<div style='display:flex;align-items:center;'>"
+            f"<span style='font-size:3rem;margin-right:10px;'>{emoji}</span>"
+            f"<h2 style='color:#6a0dad;'>{label.upper()}</h2>"
+            f"</div>"
+            f"<p><strong>Confiance :</strong> {conf*100:.1f}%</p>"
+        )
+        return transcription, summary_html, seg_df, history_entry
+    except Exception as e:
+        error_msg = f"Erreur lors de l'analyse: {str(e)}"
+        return error_msg, "", pd.DataFrame(), {}
+# Export CSV
+def export_history_csv(history):
+    if not history:
+        return None
+    df = pd.DataFrame(history)
+    path = "history.csv"
+    df.to_csv(path, index=False)
+    return path
+# Interface Gradio
+demo = gr.Blocks(
+    theme=gr.themes.Monochrome(primary_hue="purple"),
+    title="Analyse de Sentiment Audio - Hugging Face Space"
+)
+with demo:
+    gr.Markdown("""
+    # 🎤 Analyse de Sentiment Audio
+    Ce Space permet d'analyser le sentiment d'extraits audio en français en combinant :
+    - **Transcription audio** avec Wav2Vec2
+    - **Analyse de sentiment** avec BERT multilingue
+    """)
+    gr.HTML("""
+    <div style="display: flex; flex-direction: column; gap: 10px; margin-bottom: 20px;">
+        <div style="background-color: #f3e8ff; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #8e44ad;">
+            <strong>Étape 1 :</strong> Enregistrez votre voix ou téléversez un fichier audio (format WAV recommandé).
+        </div>
+        <div style="background-color: #e0f7fa; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #0097a7;">
+            <strong>Étape 2 :</strong> Cliquez sur le bouton <em><b>Analyser</b></em> pour lancer la transcription et l'analyse.
+        </div>
+        <div style="background-color: #fff3e0; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #fb8c00;">
+            <strong>Étape 3 :</strong> Visualisez les résultats : transcription, sentiment, et analyse détaillée.
+        </div>
+        <div style="background-color: #e8f5e9; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #43a047;">
+            <strong>Étape 4 :</strong> Exportez l'historique des analyses au format CSV si besoin.
+        </div>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            audio_in = gr.Audio(
+                sources=["microphone", "upload"],
+                type="filepath",
+                label="Audio Input",
+                info="Enregistrez ou téléversez un fichier audio"
+            )
+            btn = gr.Button("🔍 Analyser", variant="primary")
+            export_btn = gr.Button("📊 Exporter CSV")
+        with gr.Column(scale=3):
+            chat = gr.Chatbot(label="Historique des échanges")
+            transcription_out = gr.Textbox(label="Transcription", interactive=False)
+            summary_out = gr.HTML(label="Sentiment")
+            seg_out = gr.Dataframe(label="Détail par segment")
+            hist_out = gr.Dataframe(label="Historique")
+    state_chat = gr.State([])  # list of (user,bot)
+    state_hist = gr.State([])  # list of dict entries
+    def chat_callback(audio_path, chat_history, hist_state):
+        transcription, summary, seg_df, hist_entry = analyze_audio(audio_path)
+        user_msg = "[Audio reçu]"
+        bot_msg = f"**Transcription :** {transcription}\n**Sentiment :** {summary}"
+        chat_history = chat_history + [(user_msg, bot_msg)]
+        if hist_entry:
+            hist_state = hist_state + [hist_entry]
+        return chat_history, transcription, summary, seg_df, hist_state
+    btn.click(
+        fn=chat_callback,
+        inputs=[audio_in, state_chat, state_hist],
+        outputs=[chat, transcription_out, summary_out, seg_out, state_hist]
+    )
+    export_btn.click(
+        fn=export_history_csv,
+        inputs=[state_hist],
+        outputs=[gr.File(label="Télécharger CSV")]
+    )
+# Configuration pour Hugging Face Spaces
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0" if HF_SPACE else "127.0.0.1",
+        server_port=7860,
+        share=False
+    )

requirements_hf.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+transformers==4.36.2
+torch==2.1.2
+torchaudio==2.1.2
+gradio==4.15.0
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+soundfile==0.12.1
+pandas==2.1.4
+numpy==1.24.3
+scikit-learn==1.3.2