File size: 3,739 Bytes
6152174
 
 
 
 
 
 
cbe4331
6152174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbe4331
6152174
cbe4331
 
 
 
 
 
 
 
6152174
 
 
 
 
cbe4331
6152174
 
 
 
cbe4331
6152174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbe4331
 
6152174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import re
import functools
from functools import partial

import requests
import pandas as pd
#import plotly.express as px

import torch
import gradio as gr
from transformers import pipeline, Wav2Vec2ProcessorWithLM
from pyannote.audio import Pipeline
import whisperx

from utils import split, create_fig
from utils import speech_to_text as stt

os.environ["TOKENIZERS_PARALLELISM"] = "false"
device = 0 if torch.cuda.is_available() else -1

# display if the sentiment value is above these thresholds
thresholds = {"joy": 0.99,"anger": 0.95,"surprise": 0.95,"sadness": 0.98,"fear": 0.95,"love": 0.99,}
color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",}

# Audio components
whisper_device = "cuda" if torch.cuda.is_available() else "cpu"
whisper = whisperx.load_model("tiny.en", whisper_device)
alignment_model, metadata = whisperx.load_align_model(language_code="en", device=whisper_device)
speaker_segmentation = Pipeline.from_pretrained("pyannote/[email protected]", use_auth_token=os.environ['ENO_TOKEN'])

speech_to_text = partial(
    stt, 
    speaker_segmentation=speaker_segmentation, 
    whisper=whisper, 
    alignment_model=alignment_model, 
    metadata=metadata, 
    whisper_device=whisper_device
    )

# Text components
emotion_pipeline = pipeline(
    "text-classification",
    model="bhadresh-savani/distilbert-base-uncased-emotion",
    #device=device,
)
summarization_pipeline = pipeline(
    "summarization",
    model="knkarthick/MEETING_SUMMARY",
    #device=device
)

def summarize(diarized, summarization_pipeline):
    text = ""
    for d in diarized:
        text += f"\n{d[1]}: {d[0]}"

    return summarization_pipeline(text)[0]["summary_text"]

def sentiment(diarized, emotion_pipeline):
    customer_sentiments = []

    for i in range(0, len(diarized), 2):
        speaker_speech, speaker_id = diarized[i]
        sentences = split(speaker_speech)

        if "Customer" in speaker_id:
            outputs = emotion_pipeline(sentences)
            for idx, (o, t) in enumerate(zip(outputs, sentences)):
                if o["score"] > thresholds[o["label"]]:
                    customer_sentiments.append((t, o["label"]))

    return customer_sentiments

EXAMPLES = [["Customer_Support_Call.wav"]]

with gr.Blocks() as demo:

    with gr.Row():
        with gr.Column():
            audio = gr.Audio(label="Audio file", type="filepath")
            btn = gr.Button("Transcribe and Diarize")

            gr.Markdown("**Call Transcript:**")
            diarized = gr.HighlightedText(label="Call Transcript")
            gr.Markdown("Summarize Speaker")
            sum_btn = gr.Button("Get Summary")
            summary = gr.Textbox(lines=4)
            sentiment_btn = gr.Button("Get Customer Sentiment")
            analyzed = gr.HighlightedText(color_map=color_map)

        with gr.Column():
            gr.Markdown("## Example Files")
            gr.Examples(
                examples=EXAMPLES,
                inputs=[audio],
                outputs=[diarized],
                fn=speech_to_text,
                cache_examples=True
            )
    # when example button is clicked, convert audio file to text and diarize
    btn.click(
        fn=speech_to_text,
        inputs=audio,
        outputs=diarized,
    )
    # when summarize checkboxes are changed, create summary
    sum_btn.click(fn=partial(summarize, summarization_pipeline=summarization_pipeline), inputs=[diarized], outputs=summary)

    # when sentiment button clicked, display highlighted text and plot
    sentiment_btn.click(fn=partial(sentiment, emotion_pipeline=emotion_pipeline), inputs=diarized, outputs=[analyzed])

demo.launch(debug=1)