File size: 7,252 Bytes
830a45d
 
 
 
 
60136f9
 
9c296b9
830a45d
 
 
 
203bf3f
 
f3282ff
 
 
 
203bf3f
 
 
 
 
 
 
 
 
3668ac8
8d2cd0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3668ac8
 
 
 
8d2cd0e
e8653e3
3668ac8
e8653e3
 
830a45d
203bf3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
830a45d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6caca8
4a6ab73
 
57a8cb1
e268f63
 
09c8fac
 
a11f647
25a935c
a11f647
 
25a935c
a11f647
 
25a935c
a11f647
25a935c
e268f63
58d76c4
1d3dc52
830a45d
66f1e8b
f3282ff
66f1e8b
 
 
 
203bf3f
66f1e8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203bf3f
e6caca8
 
 
 
203bf3f
830a45d
 
f3282ff
830a45d
 
e6caca8
830a45d
8963f6c
203bf3f
830a45d
203bf3f
c91bff5
e8653e3
3668ac8
e8653e3
c91bff5
8ee44f6
c91bff5
 
 
 
00684fc
e268f63
09c8fac
eea7337
e6caca8
830a45d
09c8fac
3668ac8
e8653e3
830a45d
8963f6c
830a45d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import gradio as gr
import os 
import json 
import requests
import time
import pandas as pd
import io 
from scipy.io.wavfile import write
 
# AssemblyAI transcript endpoint (where we submit the file)
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"

upload_endpoint = "https://api.assemblyai.com/v2/upload"

headers={
"Authorization": os.environ["ASSEMBLYAI_KEY"],
"Content-Type": "application/json"
}

# Helper function to upload data
def _read_file(filename, chunk_size=5242880):
    with open(filename, "rb") as f:
        while True:
            data = f.read(chunk_size)
            if not data:
                break
            yield data

def _read_array(audio, chunk_size=5242880):
    """Like _read_file but for array - creates temporary unsaved "file" from sample rate and audio np.array"""
    sr, aud = audio

    # Create temporary "file" and write data to it
    bytes_wav = bytes()
    temp_file = io.BytesIO(bytes_wav)
    write(temp_file, sr, aud)

    while True:
        data = temp_file.read(chunk_size)
        if not data:
            break
        yield data



def get_audio_from_upload(audio):
    upload_response = requests.post(
    upload_endpoint,
    headers=headers, 
    data=_read_array(audio))
    return upload_response.json()['upload_url']

def get_transcript_url(audio):    
    url = get_audio_from_upload(audio)
    # JSON that tells the API which file to trancsribe
    json={
    # URL of the audio file to process
    "audio_url": url,

    # Turn on speaker labels
    "speaker_labels": True,

    # Turn on cusom vocabulary
    "word_boost": ["assembly ai"],

    # Turn on custom spelling
    "custom_spelling": [
        {"from": ["assembly AI"], "to": "AssemblyAI"},
        {"from": ["assembly AI's"], "to": "AssemblyAI's"}
        ],

    # Turn on PII Redaction and specify policies
    "redact_pii": True,
    "redact_pii_policies": ["drug", "injury", "person_name"],
    "redact_pii_audio": True,

    # Turn on Auto Highlights
    "auto_highlights": True,

    # Turn on Content Moderation
    "content_safety": True,

    # Turn on Topic Detection
    "iab_categories": True,

    # Turn on Sentiment Analysis
    "sentiment_analysis": True,

    # Turn on Summarization and specify configuration
    "summarization": True,
    "summary_model": "informative",
    "summary_type": "bullets",

    # Turn on Entity Detection
    "entity_detection": True,}

    response = requests.post(
        transcript_endpoint,
        json=json,
        headers=headers  # Authorization to link this transcription with your account
      )

    polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{response.json()['id']}"
    while True:
      transcription_result = requests.get(polling_endpoint, headers=headers).json()
      if transcription_result['status'] == 'completed':
        break
      elif transcription_result['status'] == 'error':
        raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
      else:
        time.sleep(3)

    res = transcription_result['sentiment_analysis_results']
    sentiment_analysis_result = ''

    df = pd.DataFrame(res)
    df = df.loc[:, ["text", "sentiment", "confidence"]]

    topic = transcription_result['iab_categories_result']['summary']

    topics = []

    for k in topic:
        topic_dict = {}
        topic_dict["Topic"] = " > ".join(k.split(">"))
        topic_dict["Relevance"] = topic[k]
        topics.append(topic_dict)

    df_topic = pd.DataFrame(topics)
    
    
    return transcription_result['text'], transcription_result['summary'], df, df_topic.head()

# def get_transcript_file(filename):

#     upload_response = requests.post(
#     upload_endpoint,
#     headers=headers, 
#     data=_read_file(filename))
    
#     # JSON that tells the API which file to trancsribe
#     json = {
#     # URL of the audio file to process
#     "audio_url": upload_response.json()['upload_url'],

#     # Turn on speaker labels
#     "speaker_labels": True,

#     # Turn on custom vocabulary
#     "word_boost": ["assembly ai"],

#     # Turn on custom spelling
#     "custom_spelling": [
#         {"from": ["assembly AI"], "to": "AssemblyAI"},
#         {"from": ["assembly AI's"], "to": "AssemblyAI's"}
#         ],

#     # Turn on PII Redaction and specify policies
#     "redact_pii": True,
#     "redact_pii_policies": ["drug", "injury", "person_name"],
#     "redact_pii_audio": True,

#     # Turn on Auto Highlights
#     "auto_highlights": True,

#     # Turn on Content Moderation
#     "content_safety": True,

#     # Turn on Topic Detection
#     "iab_categories": True,

#     # Turn on Sentiment Analysis
#     "sentiment_analysis": True,

#     # Turn on Summarization and specify configuration
#     "summarization": True,
#     "summary_model": "informative",
#     "summary_type": "bullets",

#     # Turn on Entity Detection
#     "entity_detection": True,
#     }

#     response = requests.post(
#         transcript_endpoint,
#         json=json,
#         headers=headers  # Authorization to link this transcription with your account
#       )

#     polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{response.json()['id']}"
#     while True:
#       transcription_result = requests.get(polling_endpoint, headers=headers).json()
#       if transcription_result['status'] == 'completed':
#         break
#       elif transcription_result['status'] == 'error':
#         raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
#       else:
#         time.sleep(3)
#     return transcription_result['text']

audio_intelligence_list = [
    "Summarization",
    "Sentiment Analysis"
]

title = """<h1 align="center">🔥Conformer-1 API </h1>"""
description = """
### In this demo, you can explore the outputs of a Conformer-1 Speech Recognition Model from AssemblyAI.
"""
                
with gr.Blocks() as demo:
    gr.HTML(title)
    gr.Markdown(description)

    with gr.Column(elem_id = "col_container"):
        
        #audio_intelligence_options = gr.CheckboxGroup(audio_intelligence_list, label="Audio Intelligence Options")
        inputs = gr.Audio(source = "upload",label = "Upload the input Audio file")
        b1 = gr.Button('Process Audio')
        

    with gr.Tabs():
        with gr.TabItem('Transcript') as transcript_tab:
            transcript = gr.Textbox(label = "Transcript Result" )
        with gr.TabItem('Summary', visible = False) as summary_tab: 
           summary = gr.Textbox(label = "Summary Result")
        with gr.TabItem('Sentiment Analysis', visible = False) as sentiment_tab:
            sentiment_analysis = gr.Dataframe(label = "Sentiment Analysis Result" )
        with gr.TabItem('Topic Detection', visible = False) as topic_detection_tab:
            topic_detection = gr.Dataframe(label = "Topic Detection Result" )
        
    
    b1.click(get_transcript_url, [inputs], [transcript, summary, sentiment_analysis,topic_detection])
    
    examples = gr.Examples(examples = [["audio.mp3"]], inputs = inputs, outputs=[transcript, summary, sentiment_analysis, topic_detection], cache_examples = True, fn = get_transcript_url)
                    
    
    demo.queue().launch(debug=True)