Ryan O'Connor commited on
Commit
e0ef1bb
·
0 Parent(s):

init commit

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv/
2
+ .idea/
README.md ADDED
File without changes
TEST.txt ADDED
File without changes
app/app.py ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import gradio as gr
4
+ import numpy as np
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ import requests
8
+
9
+ from helpers import make_header, upload_file, request_transcript, make_polling_endpoint, wait_for_completion, \
10
+ make_html_from_topics, make_paras_string, create_highlighted_list, make_summary, \
11
+ make_sentiment_output, make_entity_dict, make_entity_html, make_true_dict, make_final_json, make_content_safety_fig
12
+
13
+ from helpers import transcription_options_headers, audio_intelligence_headers, language_headers
14
+
15
+
16
+ def change_audio_source(radio, plot, file_data, mic_data):
17
+ """When the audio source radio selector is changed, update the wave plot and change the audio selector accordingly"""
18
+
19
+ # Empty plot
20
+ plot.update_traces(go.Line(y=[]))
21
+ # Update plot with appropriate data and change visibility audio components
22
+ if radio == "Audio File":
23
+ sample_rate, audio_data = file_data
24
+ plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
25
+ return [gr.Audio.update(visible=True),
26
+ gr.Audio.update(visible=False),
27
+ plot,
28
+ plot]
29
+ elif radio == "Record Audio":
30
+ sample_rate, audio_data = mic_data
31
+ plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
32
+ return [gr.Audio.update(visible=False),
33
+ gr.Audio.update(visible=True),
34
+ plot,
35
+ plot]
36
+
37
+
38
+ def plot_data(audio_data, plot):
39
+ """Updates plot and appropriate state variable when audio is uploaded/recorded or deleted"""
40
+ # If the current audio file is deleted
41
+ if audio_data is None:
42
+ # Replace the state variable for the audio source with placeholder values
43
+ sample_rate, audio_data = [0, np.array([])]
44
+ # Update the plot to be empty
45
+ plot.update_traces(go.Line(y=[]))
46
+ # If new audio is uploaded/recorded
47
+ else:
48
+ # Replace the current state variable with new
49
+ sample_rate, audio_data = audio_data
50
+ # Plot the new data
51
+ plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
52
+
53
+ # Update the plot component and data state variable
54
+ return [plot, [sample_rate, audio_data], plot]
55
+
56
+
57
+ def set_lang_vis(transcription_options):
58
+ """Sets visibility of language selector/warning when automatic language detection is (de)selected"""
59
+ if 'Automatic Language Detection' in transcription_options:
60
+ text = w
61
+ return [gr.Dropdown.update(visible=False),
62
+ gr.Textbox.update(visible=True),
63
+ text]
64
+ else:
65
+ text = ""
66
+ return [gr.Dropdown.update(visible=True),
67
+ gr.Textbox.update(visible=False),
68
+ text]
69
+
70
+
71
+ def option_verif(language, selected_tran_opts, selected_audint_opts):
72
+ """When the language is changed, this function automatically deselects options that are not allowed for that
73
+ language."""
74
+
75
+ not_available_tran, not_available_audint = get_unavailable_opts(language)
76
+
77
+ current_tran_opts = list(set(selected_tran_opts) - set(not_available_tran))
78
+ current_audint_opts = list(set(selected_audint_opts) - set(not_available_audint))
79
+
80
+ return [current_tran_opts,
81
+ current_audint_opts,
82
+ current_tran_opts,
83
+ current_audint_opts]
84
+
85
+
86
+ # Get tran/audint opts that are not available by language
87
+ def get_unavailable_opts(language):
88
+ """Get transcription and audio intelligence options that are unavailable for a given language"""
89
+ if language in ['Spanish', 'French', 'German', 'Portuguese']:
90
+ not_available_tran = ['Speaker Labels']
91
+ not_available_audint = ['PII Redaction', 'Auto Highlights', 'Sentiment Analysis', 'Summarization',
92
+ 'Entity Detection']
93
+
94
+ elif language in ['Italian', 'Dutch']:
95
+ not_available_tran = ['Speaker Labels']
96
+ not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
97
+ 'Sentiment Analysis', 'Summarization', 'Entity Detection']
98
+
99
+ elif language in ['Hindi', 'Japanese']:
100
+ not_available_tran = ['Speaker Labels']
101
+ not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
102
+ 'Sentiment Analysis', 'Summarization', 'Entity Detection']
103
+
104
+ else:
105
+ not_available_tran = []
106
+ not_available_audint = []
107
+
108
+ return not_available_tran, not_available_audint
109
+
110
+
111
+ # When selecting new tran option, checks to make sure allowed by language and
112
+ # then adds to selected_tran_opts and updates
113
+ def tran_selected(language, transcription_options):
114
+ """When a transcription option is selected, """
115
+ unavailable, _ = get_unavailable_opts(language)
116
+ selected_tran_opts = list(set(transcription_options) - set(unavailable))
117
+
118
+ return [selected_tran_opts, selected_tran_opts]
119
+
120
+
121
+ # When selecting new audint option, checks to make sure allowed by language and
122
+ # then adds to selected_audint_opts and updates
123
+ def audint_selected(language, audio_intelligence_selector):
124
+ """Deselected"""
125
+ _, unavailable = get_unavailable_opts(language)
126
+ selected_audint_opts = list(set(audio_intelligence_selector) - set(unavailable))
127
+
128
+ return [selected_audint_opts, selected_audint_opts]
129
+
130
+
131
+ def create_ouput(r, paras, language, transc_opts=None, audint_opts=None):
132
+ """From a transcript response, return all outputs for audio intelligence"""
133
+ if transc_opts is None:
134
+ transc_opts = ['Automatic Language Detection', 'Speaker Labels', 'Filter Profanity']
135
+
136
+ if audint_opts is None:
137
+ audint_opts = ['Summarization', 'Auto Highlights', 'Topic Detection', 'Entity Detection',
138
+ 'Sentiment Analysis', 'PII Redaction', 'Content Moderation']
139
+
140
+ # DIARIZATION
141
+ if "Speaker Labels" in transc_opts:
142
+ utts = '\n\n\n'.join([f"Speaker {utt['speaker']}:\n\n" + utt['text'] for utt in r['utterances']])
143
+ else:
144
+ utts = " NOT ANALYZED"
145
+
146
+ # HIGHLIGHTS
147
+ if 'Auto Highlights' in audint_opts:
148
+ highlight_dict = create_highlighted_list(paras, r['auto_highlights_result']['results'])
149
+ else:
150
+ highlight_dict =[["NOT ANALYZED", 0]]
151
+
152
+ # SUMMARIZATION'
153
+ if 'Summarization' in audint_opts:
154
+ chapters = r['chapters']
155
+ summary_html = make_summary(chapters)
156
+ else:
157
+ summary_html = "<p>NOT ANALYZED</p>"
158
+
159
+ # TOPIC DETECTION
160
+ if "Topic Detection" in audint_opts:
161
+ topics = r['iab_categories_result']['summary']
162
+ topics_html = make_html_from_topics(topics)
163
+ else:
164
+ topics_html = "<p>NOT ANALYZED</p>"
165
+
166
+ # SENTIMENT
167
+ if "Sentiment Analysis" in audint_opts:
168
+ sent_results = r['sentiment_analysis_results']
169
+ sent = make_sentiment_output(sent_results)
170
+ else:
171
+ sent = "<p>NOT ANALYZED</p>"
172
+
173
+ # ENTITY
174
+ if "Entity Detection" in audint_opts:
175
+ entities = r['entities']
176
+ t = r['text']
177
+ d = make_entity_dict(entities, t)
178
+ entity_html = make_entity_html(d)
179
+ else:
180
+ entity_html = "<p>NOT ANALYZED</p>"
181
+
182
+ # CONTENT SAFETY
183
+ if "Content Moderation" in audint_opts:
184
+ cont = r['content_safety_labels']['summary']
185
+ content_fig = make_content_safety_fig(cont)
186
+ else:
187
+ content_fig = go.Figure()
188
+
189
+ return [language, paras, utts, highlight_dict, summary_html, topics_html, sent, entity_html, content_fig]
190
+
191
+
192
+ def submit_to_AAI(api_key,
193
+ transcription_options,
194
+ audio_intelligence_selector,
195
+ language,
196
+ radio,
197
+ audio_file,
198
+ mic_recording):
199
+ # Make request header
200
+ header = make_header(api_key)
201
+
202
+ # Map transcription/audio intelligence options to AssemblyAI API request JSON dict
203
+ true_dict = make_true_dict(transcription_options, audio_intelligence_selector)
204
+
205
+ final_json, language = make_final_json(true_dict, language)
206
+ final_json = {**true_dict, **final_json}
207
+
208
+ # Select which audio to use
209
+ if radio == "Audio File":
210
+ audio_data = audio_file
211
+ elif radio == "Record Audio":
212
+ audio_data = mic_recording
213
+
214
+ # Upload the audio
215
+ upload_url = upload_file(audio_data, header, is_file=False)
216
+
217
+ # Request transcript
218
+ transcript_response = request_transcript(upload_url, header, **final_json)
219
+
220
+ # Wait for the transcription to complete
221
+ polling_endpoint = make_polling_endpoint(transcript_response)
222
+ wait_for_completion(polling_endpoint, header)
223
+
224
+ # Fetch results JSON
225
+ r = requests.get(polling_endpoint, headers=header, json=final_json).json()
226
+
227
+ # Fetch paragraphs of transcript
228
+ transc_id = r['id']
229
+ paras = make_paras_string(transc_id, header)
230
+ return create_ouput(r, paras, language, transcription_options, audio_intelligence_selector)
231
+
232
+
233
+ def example_output(language):
234
+ """Displays example output"""
235
+ with open("../example_data/paras.txt", 'r') as f:
236
+ paras = f.read()
237
+
238
+ with open('../example_data/response.json', 'r') as f:
239
+ r = json.load(f)
240
+
241
+ return create_ouput(r, paras, language)
242
+
243
+
244
+ with open('styles.css', 'r') as f:
245
+ css = f.read()
246
+
247
+ with gr.Blocks(css=css) as demo:
248
+ '''
249
+ gr.HTML("<script>"
250
+ "window.addEventListener('load', function () {"
251
+ "gradioURL = window.location.href"
252
+ "if (!gradioURL.endsWith('?__theme=light')) {"
253
+ "window.location.replace(gradioURL + '?__theme=light');"
254
+ "}"
255
+ "});"
256
+ "</script>")
257
+ '''
258
+ # Load image
259
+ gr.HTML('<a href="https://www.assemblyai.com/"><img src="file/images/logo.png" class="logo"></a>')
260
+
261
+ # Load descriptions
262
+ gr.HTML("<h1 class='title'>Audio Intelligence Dashboard</h1>"
263
+ "<br>"
264
+ "<p>Check out the [BLOG NAME] blog to learn how to build this dashboard.</p>")
265
+
266
+ gr.HTML("<h1 class='title'>Directions</h1>"
267
+ "<p>To use this dashboard:</p>"
268
+ "<ul>"
269
+ "<li>1) Paste your AssemblyAI API Key into the box below - you can copy it from <a href=\"https://app.assemblyai.com/signup\">here</a> (or get one for free if you don't already have one)</li>"
270
+ "<li>2) Choose an audio source and upload or record audio</li>"
271
+ "<li>3) Select the types of analyses you would like to perform on the audio</li>"
272
+ "<li>4) Click <i>Submit</i></li>"
273
+ "<li>5) View the results at the bottom of the page</li>"
274
+ "<ul>"
275
+ "<br>"
276
+ "<p>You may also click <b>Show Example Output</b> below to see an example without having to enter an API key.")
277
+
278
+ gr.HTML('<div class="alert alert__warning"><span>'
279
+ 'Note that this dashboard is not an official AssemblyAI product and is intended for educational purposes.'
280
+ '</span></div>')
281
+
282
+ # API Key title
283
+ with gr.Box():
284
+ gr.HTML("<p class=\"apikey\">API Key:</p>")
285
+ # API key textbox (password-style)
286
+ api_key = gr.Textbox(label="", elem_id="pw")
287
+
288
+ # Gradio states for - plotly Figure object, audio data for file source, and audio data for mic source
289
+ plot = gr.State(px.line(labels={'x': 'Time (s)', 'y': ''}))
290
+ file_data = gr.State([1, [0]]) # [sample rate, [data]]
291
+ mic_data = gr.State([1, [0]]) # [Sample rate, [data]]
292
+
293
+ # Options that the user wants
294
+ selected_tran_opts = gr.State(list(transcription_options_headers.keys()))
295
+ selected_audint_opts = gr.State(list(audio_intelligence_headers.keys()))
296
+
297
+ # Current options = selected options - unavailable options for specified language
298
+ current_tran_opts = gr.State([])
299
+ current_audint_opts = gr.State([])
300
+
301
+ # Selector for audio source
302
+ radio = gr.Radio(["Audio File", "Record Audio"], label="Audio Source", value="Audio File")
303
+
304
+ # Audio object for both file and microphone data
305
+ with gr.Box():
306
+ audio_file = gr.Audio(interactive=True)
307
+ mic_recording = gr.Audio(source="microphone", visible=False, interactive=True)
308
+
309
+ # Audio wave plot
310
+ audio_wave = gr.Plot(plot.value)
311
+
312
+ # Checkbox for transcription options
313
+ transcription_options = gr.CheckboxGroup(
314
+ choices=list(transcription_options_headers.keys()),
315
+ value=list(transcription_options_headers.keys()),
316
+ label="Transcription Options",
317
+ )
318
+
319
+ # Warning for using Automatic Language detection
320
+ w = "<div class='alert alert__warning'>" \
321
+ "<p>Automatic Language Detection not available for Hindi or Japanese. For best results on non-US " \
322
+ "English audio, specify the dialect instead of using Automatic Language Detection. " \
323
+ "<br>" \
324
+ "Some Audio Intelligence features are not available in some languages. See " \
325
+ "<a href='https://airtable.com/shr53TWU5reXkAmt2/tblf7O4cffFndmsCH?backgroundColor=green'>here</a> " \
326
+ "for more details.</p>" \
327
+ "</div>"
328
+
329
+ auto_lang_detect_warning = gr.HTML(w)
330
+
331
+ # Checkbox for Audio Intelligence options
332
+ audio_intelligence_selector = gr.CheckboxGroup(
333
+ choices=list(audio_intelligence_headers.keys()),
334
+ value=list(audio_intelligence_headers.keys()),
335
+ label='Audio Intelligence Options'
336
+ )
337
+
338
+ # Language selector for manual language selection
339
+ language = gr.Dropdown(
340
+ choices=list(language_headers.keys()),
341
+ value="US English",
342
+ label="Language Specification",
343
+ visible=False,
344
+ )
345
+
346
+ # Button to submit audio for processing with selected options
347
+ submit = gr.Button('Submit')
348
+
349
+ # Button to submit audio for processing with selected options
350
+ example = gr.Button('Show Example Output')
351
+
352
+ # Results tab group
353
+ phl = 10
354
+ with gr.Tab('Transcript'):
355
+ trans_tab = gr.Textbox(placeholder="Your formatted transcript will appear here ...",
356
+ lines=phl,
357
+ max_lines=25,
358
+ show_label=False)
359
+ with gr.Tab('Speaker Labels'):
360
+ diarization_tab = gr.Textbox(placeholder="Your diarized transcript will appear here ...",
361
+ lines=phl,
362
+ max_lines=25,
363
+ show_label=False)
364
+ with gr.Tab('Auto Highlights'):
365
+ highlights_tab = gr.HighlightedText()
366
+ with gr.Tab('Summary'):
367
+ summary_tab = gr.HTML("<br>" * phl)
368
+ with gr.Tab("Detected Topics"):
369
+ topics_tab = gr.HTML("<br>" * phl)
370
+ with gr.Tab("Sentiment Analysis"):
371
+ sentiment_tab = gr.HTML("<br>" * phl)
372
+ with gr.Tab("Entity Detection"):
373
+ entity_tab = gr.HTML("<br>" * phl)
374
+ with gr.Tab("Content Safety"):
375
+ content_tab = gr.Plot()
376
+
377
+ ####################################### Functionality ######################################################
378
+
379
+ # Changing audio source changes Audio input component
380
+ radio.change(fn=change_audio_source,
381
+ inputs=[
382
+ radio,
383
+ plot,
384
+ file_data,
385
+ mic_data],
386
+ outputs=[
387
+ audio_file,
388
+ mic_recording,
389
+ audio_wave,
390
+ plot])
391
+
392
+ # Inputting audio updates plot
393
+ audio_file.change(fn=plot_data,
394
+ inputs=[audio_file, plot],
395
+ outputs=[audio_wave, file_data, plot]
396
+ )
397
+ mic_recording.change(fn=plot_data,
398
+ inputs=[mic_recording, plot],
399
+ outputs=[audio_wave, mic_data, plot])
400
+
401
+ # Deselecting Automatic Language Detection shows Language Selector
402
+ transcription_options.change(
403
+ fn=set_lang_vis,
404
+ inputs=transcription_options,
405
+ outputs=[language, auto_lang_detect_warning, auto_lang_detect_warning])
406
+
407
+ # Changing language deselects certain Tran / Audio Intelligence options
408
+ language.change(
409
+ fn=option_verif,
410
+ inputs=[language,
411
+ selected_tran_opts,
412
+ selected_audint_opts],
413
+ outputs=[transcription_options, audio_intelligence_selector, current_tran_opts, current_audint_opts]
414
+ )
415
+
416
+ # Selecting Tran options adds it to selected if language allows it
417
+ transcription_options.change(
418
+ fn=tran_selected,
419
+ inputs=[language, transcription_options],
420
+ outputs=[transcription_options, selected_tran_opts, ]
421
+ )
422
+
423
+ # Selecting audio intelligence options adds it to selected if language allows it
424
+ audio_intelligence_selector.change(
425
+ fn=audint_selected,
426
+ inputs=[language, audio_intelligence_selector],
427
+ outputs=[audio_intelligence_selector, selected_audint_opts]
428
+ )
429
+
430
+ # Clicking "submit" uploads selected audio to AssemblyAI, performs requested analyses, and displays results
431
+ submit.click(fn=submit_to_AAI,
432
+ inputs=[api_key,
433
+ transcription_options,
434
+ audio_intelligence_selector,
435
+ language,
436
+ radio,
437
+ audio_file,
438
+ mic_recording],
439
+ outputs=[language,
440
+ trans_tab,
441
+ diarization_tab,
442
+ highlights_tab,
443
+ summary_tab,
444
+ topics_tab,
445
+ sentiment_tab,
446
+ entity_tab,
447
+ content_tab])
448
+
449
+ # Clicking "Show Example Output" displays example results
450
+ example.click(fn=example_output,
451
+ inputs=language,
452
+ outputs=[language,
453
+ trans_tab,
454
+ diarization_tab,
455
+ highlights_tab,
456
+ summary_tab,
457
+ topics_tab,
458
+ sentiment_tab,
459
+ entity_tab,
460
+ content_tab])
461
+
462
+ # Launch the application
463
+ demo.launch() # share=True
app/css_components/__init__.py ADDED
File without changes
app/css_components/build_css.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Strings together css files in this folder and exports to `../styles.css`
2
+
3
+ import os
4
+
5
+ css_filepaths = [f for f in os.listdir() if f.endswith(".css")]
6
+
7
+ css_filepaths.remove('file.css')
8
+ css_filepaths.insert(0, 'file.css')
9
+
10
+ css = ""
11
+ for filepath in css_filepaths:
12
+ with open(filepath, 'r') as file:
13
+ css += file.read()
14
+
15
+ with open("../styles.css", 'w') as f:
16
+ f.write(css)
app/css_components/build_topic_detection.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Programmatic way to generate `topic_detection.css`
2
+
3
+ css = ".istopic {\n" \
4
+ "color: #6b2bd6;" \
5
+ "\n}" \
6
+ "\n\n"
7
+
8
+ # Font size of highest level topic
9
+ starting_fs = 30
10
+ # Font size difference between topic and subtopic
11
+ fs_diff = 5
12
+ # Minimum font size of text
13
+ fs_min = 15
14
+ # Number of pixels to indent at each level
15
+ ind = 18
16
+
17
+ for i in range(10):
18
+ css += f".topic-L{i} {{\n" \
19
+ f"font-size: {max(starting_fs-i*fs_diff, fs_min)}px;\n" \
20
+ f"text-indent: {ind*i}px;\n" \
21
+ f"}}" \
22
+ f"\n\n"
23
+
24
+ with open('topic_detection.css', 'w') as f:
25
+ f.write(css)
app/css_components/file.css ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica,
3
+ Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
4
+ }
5
+
6
+ .logo {
7
+ width: 180px;
8
+ }
9
+
10
+ .title {
11
+ font-weight: 600;
12
+ text-align: left;
13
+ color: black;
14
+ font-size: 18px;
15
+ }
16
+
17
+ .alert,
18
+ #component-2,
19
+ #component-3 {
20
+ padding: 24px;
21
+ color: black;
22
+ background-color: #f4f8fb;
23
+ border: 1px solid #d6dce7;
24
+ border-radius: 8px;
25
+ box-shadow: 0px 6px 15px rgb(0 0 0 / 2%), 0px 2px 5px rgb(0 0 0 / 4%);
26
+ }
27
+
28
+ ol {
29
+ list-style: disc;
30
+ }
31
+
32
+ .alert__info {
33
+ background-color: #f4f8fb;
34
+ color: #323552;
35
+ }
36
+
37
+ .alert__warning {
38
+ background-color: #fffae5;
39
+ color: #917115;
40
+ border: 1px solid #e4cf2b;
41
+ }
42
+
43
+ #pw {
44
+ -webkit-text-security: disc;
45
+ }
46
+
47
+ /* unvisited link */
48
+ a:link {
49
+ color: #6b2bd6;
50
+ }
51
+
52
+ /* visited link */
53
+ a:visited {
54
+ color: #6b2bd6;
55
+ }
56
+
57
+ /* mouse over link */
58
+ a:hover {
59
+ color: #6b2bd6;
60
+ }
61
+
62
+ /* selected link */
63
+ a:active {
64
+ color: #6b2bd6;
65
+ }
66
+
67
+ li {
68
+ margin-left: 1em;
69
+ }
70
+
71
+ .apikey {
72
+ }
73
+
74
+ .entity-list {
75
+ color: #6b2bd6;
76
+ font-size: 16px
77
+ }
78
+
79
+ .entity-elt {
80
+ color: black
81
+ }
app/css_components/topic_detection.css ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .istopic {
2
+ color: #6b2bd6;
3
+ }
4
+
5
+ .topic-L0 {
6
+ font-size: 30px;
7
+ text-indent: 0px;
8
+ }
9
+
10
+ .topic-L1 {
11
+ font-size: 25px;
12
+ text-indent: 18px;
13
+ }
14
+
15
+ .topic-L2 {
16
+ font-size: 20px;
17
+ text-indent: 36px;
18
+ }
19
+
20
+ .topic-L3 {
21
+ font-size: 15px;
22
+ text-indent: 54px;
23
+ }
24
+
25
+ .topic-L4 {
26
+ font-size: 15px;
27
+ text-indent: 72px;
28
+ }
29
+
30
+ .topic-L5 {
31
+ font-size: 15px;
32
+ text-indent: 90px;
33
+ }
34
+
35
+ .topic-L6 {
36
+ font-size: 15px;
37
+ text-indent: 108px;
38
+ }
39
+
40
+ .topic-L7 {
41
+ font-size: 15px;
42
+ text-indent: 126px;
43
+ }
44
+
45
+ .topic-L8 {
46
+ font-size: 15px;
47
+ text-indent: 144px;
48
+ }
49
+
50
+ .topic-L9 {
51
+ font-size: 15px;
52
+ text-indent: 162px;
53
+ }
54
+
app/helpers.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import requests
4
+ import time
5
+ from scipy.io.wavfile import write
6
+ import io
7
+ import plotly.express as px
8
+
9
+
10
+ upload_endpoint = "https://api.assemblyai.com/v2/upload"
11
+ transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
12
+
13
+ # Colors for sentiment analysis highlighting
14
+ green = "background-color: #159609"
15
+ red = "background-color: #cc0c0c"
16
+
17
+ # Converts Gradio checkboxes to AssemlbyAI header arguments
18
+ transcription_options_headers = {
19
+ 'Automatic Language Detection': 'language_detection',
20
+ 'Speaker Labels': 'speaker_labels',
21
+ 'Filter Profanity': 'filter_profanity',
22
+ }
23
+
24
+ # Converts Gradio checkboxes to AssemblyAI header arguments
25
+ audio_intelligence_headers = {
26
+ 'Summarization': 'auto_chapters',
27
+ 'Auto Highlights': 'auto_highlights',
28
+ 'Topic Detection': 'iab_categories',
29
+ 'Entity Detection': 'entity_detection',
30
+ 'Sentiment Analysis': 'sentiment_analysis',
31
+ 'PII Redaction': 'redact_pii',
32
+ 'Content Moderation': 'content_safety',
33
+ }
34
+
35
+ # Converts selected language in Gradio to language code for AssemblyAI header argument
36
+ language_headers = {
37
+ 'Global English': 'en',
38
+ 'US English': 'en_us',
39
+ 'British English': 'en_uk',
40
+ 'Australian English': 'en_au',
41
+ 'Spanish': 'es',
42
+ 'French': 'fr',
43
+ 'German': 'de',
44
+ 'Italian': 'it',
45
+ 'Portuguese': 'pt',
46
+ 'Dutch': 'nl',
47
+ 'Hindi': 'hi',
48
+ 'Japanese': 'jp',
49
+ }
50
+
51
+
52
+ def make_header(api_key):
53
+ return {
54
+ 'authorization': api_key,
55
+ 'content-type': 'application/json'
56
+ }
57
+
58
+
59
+ def _read_file(filename, chunk_size=5242880):
60
+ """Helper for `upload_file()`"""
61
+ with open(filename, "rb") as f:
62
+ while True:
63
+ data = f.read(chunk_size)
64
+ if not data:
65
+ break
66
+ yield data
67
+
68
+
69
+ def _read_array(audio, chunk_size=5242880):
70
+ """Like _read_file but for array - creates temporary unsaved "file" from sample rate and audio np.array"""
71
+ sr, aud = audio
72
+
73
+ # Create temporary "file" and write data to it
74
+ bytes_wav = bytes()
75
+ temp_file = io.BytesIO(bytes_wav)
76
+ write(temp_file, sr, aud)
77
+
78
+ while True:
79
+ data = temp_file.read(chunk_size)
80
+ if not data:
81
+ break
82
+ yield data
83
+
84
+
85
+ def upload_file(audio_file, header, is_file=True):
86
+ """Uploads a file to AssemblyAI for analysis"""
87
+ upload_response = requests.post(
88
+ upload_endpoint,
89
+ headers=header,
90
+ data=_read_file(audio_file) if is_file else _read_array(audio_file)
91
+ )
92
+ if upload_response.status_code != 200:
93
+ upload_response.raise_for_status()
94
+ # Returns {'upload_url': <URL>}
95
+ return upload_response.json()
96
+
97
+
98
+ def request_transcript(upload_url, header, **kwargs):
99
+ """Request a transcript/audio analysis from AssemblyAI"""
100
+
101
+ # If input is a dict returned from `upload_file` rather than a raw upload_url string
102
+ if type(upload_url) is dict:
103
+ upload_url = upload_url['upload_url']
104
+
105
+ # Create request
106
+ transcript_request = {
107
+ 'audio_url': upload_url,
108
+ **kwargs
109
+ }
110
+
111
+ # POST request
112
+ transcript_response = requests.post(
113
+ transcript_endpoint,
114
+ json=transcript_request,
115
+ headers=header
116
+ )
117
+
118
+ return transcript_response.json()
119
+
120
+
121
+ def make_polling_endpoint(transcript_id):
122
+ """Create a polling endpoint from a transcript ID to check on the status of the transcript"""
123
+ # If upload response is input rather than raw upload_url string
124
+ if type(transcript_id) is dict:
125
+ transcript_id = transcript_id['id']
126
+
127
+ polling_endpoint = "https://api.assemblyai.com/v2/transcript/" + transcript_id
128
+ return polling_endpoint
129
+
130
+
131
+ def wait_for_completion(polling_endpoint, header):
132
+ """Given a polling endpoint, waits for the transcription/audio analysis to complete"""
133
+ while True:
134
+ polling_response = requests.get(polling_endpoint, headers=header)
135
+ polling_response = polling_response.json()
136
+
137
+ if polling_response['status'] == 'completed':
138
+ break
139
+ elif polling_response['status'] == 'error':
140
+ raise Exception(f"Error: {polling_response['error']}")
141
+
142
+ time.sleep(5)
143
+
144
+
145
+ def make_true_dict(transcription_options, audio_intelligence_selector):
146
+ """Given transcription / audio intelligence Gradio options, create a dictionary to be used in AssemblyAI request"""
147
+ # Convert Gradio checkbox names to AssemblyAI API keys
148
+ aai_tran_keys = [transcription_options_headers[elt] for elt in transcription_options]
149
+ aai_audint_keys = [audio_intelligence_headers[elt] for elt in audio_intelligence_selector]
150
+
151
+ # For each checked box, set it to true in the JSON used POST request to AssemblyAI
152
+ aai_tran_dict = {key: 'true' for key in aai_tran_keys}
153
+ aai_audint_dict = {key: 'true' for key in aai_audint_keys}
154
+
155
+ return {**aai_tran_dict, **aai_audint_dict}
156
+
157
+
158
+ def make_final_json(true_dict, language):
159
+ """Takes in output of `make_true_dict()` and adds all required other key-value pairs"""
160
+ # If automatic language detection selected but no language specified, default to US english
161
+ if 'language_detection' not in true_dict:
162
+ if language is None:
163
+ language = "US English"
164
+ true_dict = {**true_dict, 'language_code': language_headers[language]}
165
+ # If PII Redaction is enabled, add default redaction policies
166
+ if 'redact_pii' in true_dict:
167
+ true_dict = {**true_dict, 'redact_pii_policies': ['drug', 'injury', 'person_name', 'money_amount']}
168
+ return true_dict, language
169
+
170
+
171
+ def _split_on_capital(string):
172
+ """Adds spaces between capitalized words of a string via regex. 'HereAreSomeWords' -> 'Here Are Some Words'"""
173
+ return ' '.join(re.findall("[A-Z][^A-Z]*", string))
174
+
175
+
176
+ def _make_tree(c, ukey=''):
177
+ '''
178
+ Given a list whose elements are nested topic lists, generates a JSON-esque dictionary tree of topics and
179
+ subtopics
180
+
181
+ E.g. the input
182
+
183
+ [
184
+
185
+ ['Education', 'CollegeEducation', 'PostgraduateEducation'],
186
+
187
+ ['Education', 'CollegeEducation', 'UndergraduateEducation']
188
+
189
+ ]
190
+
191
+ Would output a dictionary corresponding to a tree with two leaves, 'UndergraduateEducation' and
192
+ 'PostgraduateEducation', which fall under a node 'CollegeEducation' which in turn falls under the node 'Education'
193
+
194
+ :param c: List of topics
195
+ :param ukey: "Upper key". For recursion - name of upper level key whose value (list) is being recursed on
196
+ :return: Dictionary that defines a tree structure
197
+ '''
198
+
199
+ # Create empty dict for current sublist
200
+ d = dict()
201
+
202
+ # If leaf, return None
203
+ if c is None and ukey is None:
204
+ return None
205
+ elif c is None:
206
+ return {None: None}
207
+ else:
208
+ # For each elt of the input (itself a list),
209
+ for n, i in enumerate(c):
210
+ # For topics with sublist e.g. if ['NewsAndPolitics' 'Politics'] and
211
+ # ['NewsAndPolitics' 'Politics', 'Elections'] are both in list - need way to signify politics itself
212
+ # included
213
+ if i is None:
214
+ d[None] = None
215
+ # If next subtopic not in dict, add it. If the remaining list empty, make value None
216
+ elif i[0] not in d.keys():
217
+ topic = i.pop(0)
218
+ d[topic] = None if i == [] else [i]
219
+ # If subtopic already in dict
220
+ else:
221
+ # If the value for this subtopic is only None (i.e. subject itself is a leaf), then append sublist
222
+ if d[i[0]] is None:
223
+ d[i[0]] = [None, i[1:]]
224
+ # If value for this subtopic is a list itself, then append the remaining list
225
+ else:
226
+ d[i[0]].append(i[1:])
227
+ # Recurse on remaining leaves
228
+ for key in d:
229
+ d[key] = _make_tree(d[key], key)
230
+ return d
231
+
232
+
233
+ def _make_html_tree(dic, level=0, HTML=''):
234
+ """Generates an HTML tree from an output of _make_tree"""
235
+ HTML += "<ul>"
236
+ for key in dic:
237
+ # Add the topic to HTML, specifying the current level and whether it is a topic
238
+ if type(dic[key]) == dict:
239
+ HTML += "<li>"
240
+ if None in dic[key].keys():
241
+ del dic[key][None]
242
+ HTML += f'<p class="topic-L{level} istopic">{_split_on_capital(key)}</p>'
243
+ else:
244
+ HTML += f'<p class="topic-L{level}">{_split_on_capital(key)}</p>'
245
+ HTML += "</li>"
246
+
247
+ HTML = _make_html_tree(dic[key], level=level + 1, HTML=HTML)
248
+ else:
249
+ HTML += "<li>"
250
+ HTML += f'<p class="topic-L{level} istopic">{_split_on_capital(key)}</p>'
251
+ HTML += "</li>"
252
+ HTML += "</ul>"
253
+ return HTML
254
+
255
+
256
+ def _make_html_body(dic):
257
+ """Makes an HTML body from an output of _make_tree"""
258
+ HTML = '<body>'
259
+ HTML += _make_html_tree(dic)
260
+ HTML += "</body>"
261
+ return HTML
262
+
263
+
264
+ def _make_html(dic):
265
+ """Makes a full HTML document from an output of _make_tree using styles.css styling"""
266
+ HTML = '<!DOCTYPE html>' \
267
+ '<html>' \
268
+ '<head>' \
269
+ '<title>Another simple example</title>' \
270
+ '<link rel="stylesheet" type="text/css" href="styles.css"/>' \
271
+ '</head>'
272
+ HTML += _make_html_body(dic)
273
+ HTML += "</html>"
274
+ return HTML
275
+
276
+
277
+ # make_html_from_topics(j['iab_categories_result']['summary'])
278
+ def make_html_from_topics(dic, threshold=0.0):
279
+ """Given a topics dictionary from AAI Topic Detection API, generates appropriate corresponding structured HTML.
280
+ Input is `response.json()['iab_categories_result']['summary']` from GET request on AssemblyAI `v2/transcript`
281
+ endpoint."""
282
+ # Potentially filter some items out
283
+ cats = [k for k, v in dic.items() if float(v) >= threshold]
284
+
285
+ # Sort remaining topics
286
+ cats.sort()
287
+
288
+ # Split items into lists
289
+ cats = [i.split(">") for i in cats]
290
+
291
+ # Make topic tree
292
+ tree = _make_tree(cats)
293
+
294
+ # Return formatted HTML
295
+ return _make_html(tree)
296
+
297
+
298
+ def make_paras_string(transc_id, header):
299
+ """ Makes a string by concatenating paragraphs newlines in between. Input is response.json()['paragraphs'] from
300
+ from AssemblyAI paragraphs endpoint """
301
+ endpoint = transcript_endpoint + "/" + transc_id + "/paragraphs"
302
+ paras = requests.get(endpoint, headers=header).json()['paragraphs']
303
+ paras = '\n\n'.join(i['text'] for i in paras)
304
+ return paras
305
+
306
+
307
+ def create_highlighted_list(paragraphs_string, highlights_result, rank=0):
308
+ """Outputs auto highlights information in appropriate format for `gr.HighlightedText()`. `highlights_result` is
309
+ response.json()['auto_highlights_result]['results'] where response from GET request on AssemblyAI v2/transcript
310
+ endpoint"""
311
+ # Max and min opacities to highlight to
312
+ MAX_HIGHLIGHT = 1 # Max allowed = 1
313
+ MIN_HIGHLIGHT = 0.25 # Min allowed = 0
314
+
315
+ # Filter list for everything above the input rank
316
+ highlights_result = [i for i in highlights_result if i['rank'] >= rank]
317
+
318
+ # Get max/min ranks and find scale/shift we'll need so ranks are mapped to [MIN_HIGHLIGHT, MAX_HIGHLIGHT]
319
+ max_rank = max([i['rank'] for i in highlights_result])
320
+ min_rank = min([i['rank'] for i in highlights_result])
321
+ scale = (MAX_HIGHLIGHT - MIN_HIGHLIGHT) / (max_rank - min_rank)
322
+ shift = (MAX_HIGHLIGHT - max_rank * scale)
323
+
324
+ # Isolate only highlight text and rank
325
+ highlights_result = [(i['text'], i['rank']) for i in highlights_result]
326
+
327
+ entities = []
328
+ for highlight, rank in highlights_result:
329
+ # For each highlight, find all starting character instances
330
+ starts = [c.start() for c in re.finditer(highlight, paragraphs_string)]
331
+ # Create list of locations for this highlight with entity value (highlight opacity) scaled properly
332
+ e = [{"entity": rank * scale + shift,
333
+ "start": start,
334
+ "end": start + len(highlight)}
335
+ for start in starts]
336
+ entities += e
337
+
338
+ # Create dictionary
339
+ highlight_dict = {"text": paragraphs_string, "entities": entities}
340
+
341
+ # Sort entities by start char. A bug in Gradio requires this
342
+ highlight_dict['entities'] = sorted(highlight_dict['entities'], key=lambda x: x['start'])
343
+
344
+ return highlight_dict
345
+
346
+
347
+ def make_summary(chapters):
348
+ """Makes HTML for "Summary" `gr.Tab()` tab. Input is `response.json()['chapters']` where response is from GET
349
+ request to AssemblyAI's v2/transcript endpoint"""
350
+ html = "<div>"
351
+ for chapter in chapters:
352
+ html += "<details>" \
353
+ f"<summary><b>{chapter['headline']}</b></summary>" \
354
+ f"{chapter['summary']}" \
355
+ "</details>"
356
+ html += "</div>"
357
+ return html
358
+
359
+
360
+ def to_hex(num, max_opacity=128):
361
+ """Converts a confidence value in the range [0, 1] to a hex value"""
362
+ return hex(int(max_opacity * num))[2:]
363
+
364
+
365
+ def make_sentiment_output(sentiment_analysis_results):
366
+ """Makes HTML output of sentiment analysis info for display with `gr.HTML()`. Input is
367
+ `response.json()['sentiment_analysis_results']` from GET request on AssemblyAI v2/transcript."""
368
+ p = '<p>'
369
+ for sentiment in sentiment_analysis_results:
370
+ if sentiment['sentiment'] == 'POSITIVE':
371
+ p += f'<mark style="{green + to_hex(sentiment["confidence"])}">' + sentiment['text'] + '</mark> '
372
+ elif sentiment['sentiment'] == "NEGATIVE":
373
+ p += f'<mark style="{red + to_hex(sentiment["confidence"])}">' + sentiment['text'] + '</mark> '
374
+ else:
375
+ p += sentiment['text'] + ' '
376
+ p += "</p>"
377
+ return p
378
+
379
+
380
+ def make_entity_dict(entities, t, offset=40):
381
+ """Creates dictionary that will be used to generate HTML for Entity Detection `gr.Tab()` tab.
382
+ Inputs are response.json()['entities'] and response.json()['text'] for response of GET request
383
+ on AssemblyAI v2/transcript endpoint"""
384
+ len_text = len(t)
385
+
386
+ d = {}
387
+ for entity in entities:
388
+ # Find entity in the text
389
+ s = t.find(entity['text'])
390
+ if s == -1:
391
+ p = None
392
+ else:
393
+ len_entity = len(entity['text'])
394
+ # Get entity context (colloquial sense)
395
+ p = t[max(0, s - offset):min(s + len_entity + offset, len_text)]
396
+ # Make sure start and end with a full word
397
+ p = '... ' + ' '.join(p.split(' ')[1:-1]) + ' ...'
398
+ # Add to dict
399
+ label = ' '.join(entity['entity_type'].split('_')).title()
400
+ if label in d:
401
+ d[label] += [[p, entity['text']]]
402
+ else:
403
+ d[label] = [[p, entity['text']]]
404
+
405
+ return d
406
+
407
+
408
+ def make_entity_html(d, highlight_color="#FFFF0080"):
409
+ """Input is output of `make_entity_dict`. Creates HTML for Entity Detection info"""
410
+ h = "<ul>"
411
+ for i in d:
412
+ h += f"""<li style="color: #6b2bd6; font-size: 20px;">{i}"""
413
+ h += "<ul>"
414
+ for sent, ent in d[i]:
415
+ if sent is None:
416
+ h += f"""<li style="color: black; font-size: 16px;">[REDACTED]</li>"""
417
+ else:
418
+ h += f"""<li style="color: black; font-size: 16px;">{sent.replace(ent, f'<mark style="background-color: {highlight_color}">{ent}</mark>')}</li>"""
419
+ h += '</ul>'
420
+ h += '</li>'
421
+ h += "</ul>"
422
+ return h
423
+
424
+
425
+ def make_content_safety_fig(cont_safety_summary):
426
+ """Creates content safety figure from response.json()['content_safety_labels']['summary'] from GET request on
427
+ AssemblyAI v2/transcript endpoint"""
428
+ # Create dictionary as demanded by plotly
429
+ d = {'label': [], 'severity': [], 'color': []}
430
+
431
+ # For each sentitive topic, add the (formatted) name, severity, and plot color
432
+ for key in cont_safety_summary:
433
+ d['label'] += [' '.join(key.split('_')).title()]
434
+ d['severity'] += [cont_safety_summary[key]]
435
+ d['color'] += ['rgba(107, 43, 214, 1)']
436
+
437
+ # Create the figure (n.b. repetitive color info but was running into plotly bugs)
438
+ content_fig = px.bar(d, x='severity', y='label', color='color', color_discrete_map={
439
+ 'Crime Violence': 'rgba(107, 43, 214, 0.1)',
440
+ 'Alcohol': 'rgba(107, 43, 214, 0.1)',
441
+ 'Accidents': 'rgba(107, 43, 214, 0.1)'})
442
+
443
+ # Update the content figure plot
444
+ content_fig.update_layout({'plot_bgcolor': 'rgba(107, 43, 214, 0.1)'})
445
+
446
+ # Scales axes appropriately
447
+ content_fig.update_xaxes(range=[0, 1])
448
+ return content_fig
app/images/logo.png ADDED
app/styles.css ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica,
3
+ Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
4
+ }
5
+
6
+ .logo {
7
+ width: 180px;
8
+ }
9
+
10
+ .title {
11
+ font-weight: 600;
12
+ text-align: left;
13
+ color: black;
14
+ font-size: 18px;
15
+ }
16
+
17
+ .alert,
18
+ #component-2,
19
+ #component-3 {
20
+ padding: 24px;
21
+ color: black;
22
+ background-color: #f4f8fb;
23
+ border: 1px solid #d6dce7;
24
+ border-radius: 8px;
25
+ box-shadow: 0px 6px 15px rgb(0 0 0 / 2%), 0px 2px 5px rgb(0 0 0 / 4%);
26
+ }
27
+
28
+ ol {
29
+ list-style: disc;
30
+ }
31
+
32
+ .alert__info {
33
+ background-color: #f4f8fb;
34
+ color: #323552;
35
+ }
36
+
37
+ .alert__warning {
38
+ background-color: #fffae5;
39
+ color: #917115;
40
+ border: 1px solid #e4cf2b;
41
+ }
42
+
43
+ #pw {
44
+ -webkit-text-security: disc;
45
+ }
46
+
47
+ /* unvisited link */
48
+ a:link {
49
+ color: #6b2bd6;
50
+ }
51
+
52
+ /* visited link */
53
+ a:visited {
54
+ color: #6b2bd6;
55
+ }
56
+
57
+ /* mouse over link */
58
+ a:hover {
59
+ color: #6b2bd6;
60
+ }
61
+
62
+ /* selected link */
63
+ a:active {
64
+ color: #6b2bd6;
65
+ }
66
+
67
+ li {
68
+ margin-left: 1em;
69
+ }
70
+
71
+ .apikey {
72
+ }
73
+
74
+ .entity-list {
75
+ color: #6b2bd6;
76
+ font-size: 16px
77
+ }
78
+
79
+ .entity-elt {
80
+ color: black
81
+ }.istopic {
82
+ color: #6b2bd6;
83
+ }
84
+
85
+ .topic-L0 {
86
+ font-size: 30px;
87
+ text-indent: 0px;
88
+ }
89
+
90
+ .topic-L1 {
91
+ font-size: 25px;
92
+ text-indent: 18px;
93
+ }
94
+
95
+ .topic-L2 {
96
+ font-size: 20px;
97
+ text-indent: 36px;
98
+ }
99
+
100
+ .topic-L3 {
101
+ font-size: 15px;
102
+ text-indent: 54px;
103
+ }
104
+
105
+ .topic-L4 {
106
+ font-size: 15px;
107
+ text-indent: 72px;
108
+ }
109
+
110
+ .topic-L5 {
111
+ font-size: 15px;
112
+ text-indent: 90px;
113
+ }
114
+
115
+ .topic-L6 {
116
+ font-size: 15px;
117
+ text-indent: 108px;
118
+ }
119
+
120
+ .topic-L7 {
121
+ font-size: 15px;
122
+ text-indent: 126px;
123
+ }
124
+
125
+ .topic-L8 {
126
+ font-size: 15px;
127
+ text-indent: 144px;
128
+ }
129
+
130
+ .topic-L9 {
131
+ font-size: 15px;
132
+ text-indent: 162px;
133
+ }
134
+
example_data/paras.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ You will never believe what happened to me last week. My SUV broke down, so I had to send it to an auto shop to get a new gasket installed. Yesterday I was walking in South Boston to pick the car up and some guy got thrown through the window of a pub right in front of me. A few guys had been drinking and they got into an argument about the Red Sox, which resulted in a fight. When I went to break up the fight, one of the guys accidentally hit me with his elbow in the face, so I fell back and ##### ## #####.
2
+
3
+ I went to the emergency room and had to get surgery, which sucks because I have to wear a cast for two weeks and it cost me almost $#,###. My wrist still feels like s***, and I've had to take ##### all week. Besides that, things are pretty good. I started my master's degree in Political Science, which I'm excited about. The school has a great program, and I've already met a lot of good professors.
4
+
5
+ After the program, I'm going to go to law school, so it will help prepare me for that. The other good news is that I get to keep playing basketball while I'm in school. Usually people stop playing after undergrad, but I get to keep playing while I earn my degree, which is great. The program has a ton of good nutrition and physical therapy resources, too. I'm really excited to start playing on my new team.
6
+
7
+ As for this weekend, I don't have much going on. I have to call the phone company to see if I can get a new phone. The battery on my phone is broken, so I want to get it replaced. What are you doing this weekend?
example_data/response.json ADDED
The diff for this file is too large to render. See raw diff
 
example_data/topic_dict_example.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ 'Automotive': {
3
+ 'AutoRecalls': {
4
+ None: None
5
+ },
6
+ 'AutoSafety': {
7
+ None: None
8
+ },
9
+ 'AutoTechnology': {
10
+ 'AutoSafetyTechnologies': {
11
+ None: None
12
+ }
13
+ },
14
+ 'AutoType': {
15
+ 'DriverlessCars': {
16
+ None: None
17
+ }
18
+ }
19
+ },
20
+ 'BusinessAndFinance': {
21
+ 'Business': {
22
+ 'BusinessAdministration': {
23
+ None: None
24
+ }
25
+ },
26
+ 'Industries': {
27
+ 'TelecommunicationsIndustry': {
28
+ None: None
29
+ }
30
+ }
31
+ },
32
+ 'Education': {
33
+ 'CollegeEducation': {
34
+ 'PostgraduateEducation': {
35
+ None: None
36
+ },
37
+ 'UndergraduateEducation': {
38
+ None: None
39
+ }
40
+ }
41
+ },
42
+ 'HealthyLiving': {
43
+ 'FitnessAndExercise': {
44
+ 'ParticipantSports': {
45
+ None: None
46
+ }
47
+ }
48
+ },
49
+ 'MedicalHealth': {
50
+ 'CosmeticMedicalServices': {
51
+ None: None
52
+ },
53
+ 'DiseasesAndConditions': {
54
+ 'BoneAndJointConditions': {
55
+ None: None
56
+ },
57
+ 'Ear,NoseAndThroatConditions': {
58
+ None: None
59
+ },
60
+ 'Injuries': {
61
+ None: None
62
+ }
63
+ },
64
+ 'Surgery': {
65
+ None: None
66
+ }
67
+ },
68
+ 'NewsAndPolitics': {
69
+ 'Politics': {
70
+ None: None
71
+ }
72
+ },
73
+ 'Sports': {
74
+ 'Basketball': {
75
+ None: None
76
+ },
77
+ 'Boxing': {
78
+ None: None
79
+ },
80
+ 'CollegeSports': {
81
+ None: None,
82
+ 'CollegeBasketball': {
83
+ None: None
84
+ }
85
+ }
86
+ },
87
+ 'Technology&Computing': {
88
+ 'ConsumerElectronics': {
89
+ 'Smartphones': {
90
+ None: None
91
+ }
92
+ }
93
+ }
94
+ }
example_data/topic_list_example.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ['MedicalHealth>DiseasesAndConditions>Injuries',
2
+ 'Sports>CollegeSports>CollegeBasketball',
3
+ 'Sports>Basketball',
4
+ 'Technology&Computing>ConsumerElectronics>Smartphones',
5
+ 'Automotive>AutoSafety',
6
+ 'MedicalHealth>DiseasesAndConditions>BoneAndJointConditions',
7
+ 'Education>CollegeEducation>PostgraduateEducation',
8
+ 'Automotive>AutoTechnology>AutoSafetyTechnologies',
9
+ 'Automotive>AutoRecalls',
10
+ 'Education>CollegeEducation>UndergraduateEducation',
11
+ 'Sports>CollegeSports',
12
+ 'Sports>Boxing',
13
+ 'BusinessAndFinance>Business>BusinessAdministration',
14
+ 'MedicalHealth>Surgery',
15
+ 'Automotive>AutoType>DriverlessCars',
16
+ 'MedicalHealth>DiseasesAndConditions>Ear,NoseAndThroatConditions',
17
+ 'MedicalHealth>CosmeticMedicalServices',
18
+ 'NewsAndPolitics>Politics',
19
+ 'HealthyLiving>FitnessAndExercise>ParticipantSports',
20
+ 'BusinessAndFinance>Industries>TelecommunicationsIndustry']
gettysburg10.wav ADDED
Binary file (441 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==3.2
2
+ numpy==1.23.2
3
+ plotly==5.10.0
4
+ requests==2.28.1
5
+ scipy==1.9.1