conformer-asr / app.py
RamAnanth1's picture
Update app.py
56f68b3
import gradio as gr
import os
import json
import requests
import time
import pandas as pd
import io
from scipy.io.wavfile import write
# AssemblyAI transcript endpoint (where we submit the file)
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
upload_endpoint = "https://api.assemblyai.com/v2/upload"
headers={
"Authorization": os.environ["ASSEMBLYAI_KEY"],
"Content-Type": "application/json"
}
# Helper function to upload data
def _read_file(filename, chunk_size=5242880):
with open(filename, "rb") as f:
while True:
data = f.read(chunk_size)
if not data:
break
yield data
def _read_array(audio, chunk_size=5242880):
"""Like _read_file but for array - creates temporary unsaved "file" from sample rate and audio np.array"""
sr, aud = audio
# Create temporary "file" and write data to it
bytes_wav = bytes()
temp_file = io.BytesIO(bytes_wav)
write(temp_file, sr, aud)
while True:
data = temp_file.read(chunk_size)
if not data:
break
yield data
def get_audio_from_upload(audio):
upload_response = requests.post(
upload_endpoint,
headers=headers,
data=_read_array(audio))
return upload_response.json()['upload_url']
def get_transcript_url(audio):
url = get_audio_from_upload(audio)
# JSON that tells the API which file to trancsribe
json={
# URL of the audio file to process
"audio_url": url,
# Turn on speaker labels
"speaker_labels": True,
# Turn on cusom vocabulary
"word_boost": ["assembly ai"],
# Turn on custom spelling
"custom_spelling": [
{"from": ["assembly AI"], "to": "AssemblyAI"},
{"from": ["assembly AI's"], "to": "AssemblyAI's"}
],
# Turn on PII Redaction and specify policies
"redact_pii": True,
"redact_pii_policies": ["drug", "injury", "person_name"],
"redact_pii_audio": True,
# Turn on Auto Highlights
"auto_highlights": True,
# Turn on Content Moderation
"content_safety": True,
# Turn on Topic Detection
"iab_categories": True,
# Turn on Sentiment Analysis
"sentiment_analysis": True,
# Turn on Summarization and specify configuration
"summarization": True,
"summary_model": "informative",
"summary_type": "bullets",
# Turn on Entity Detection
"entity_detection": True,}
response = requests.post(
transcript_endpoint,
json=json,
headers=headers # Authorization to link this transcription with your account
)
polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{response.json()['id']}"
while True:
transcription_result = requests.get(polling_endpoint, headers=headers).json()
if transcription_result['status'] == 'completed':
break
elif transcription_result['status'] == 'error':
raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
else:
time.sleep(3)
res = transcription_result['sentiment_analysis_results']
sentiment_analysis_result = ''
df = pd.DataFrame(res)
df = df.loc[:, ["text", "sentiment", "confidence"]]
topic = transcription_result['iab_categories_result']['summary']
topics = []
for k in topic:
topic_dict = {}
topic_dict["Topic"] = " > ".join(k.split(">"))
topic_dict["Relevance"] = topic[k]
topics.append(topic_dict)
df_topic = pd.DataFrame(topics)
return transcription_result['text'], transcription_result['summary'], df, df_topic.head()
# def get_transcript_file(filename):
# upload_response = requests.post(
# upload_endpoint,
# headers=headers,
# data=_read_file(filename))
# # JSON that tells the API which file to trancsribe
# json = {
# # URL of the audio file to process
# "audio_url": upload_response.json()['upload_url'],
# # Turn on speaker labels
# "speaker_labels": True,
# # Turn on custom vocabulary
# "word_boost": ["assembly ai"],
# # Turn on custom spelling
# "custom_spelling": [
# {"from": ["assembly AI"], "to": "AssemblyAI"},
# {"from": ["assembly AI's"], "to": "AssemblyAI's"}
# ],
# # Turn on PII Redaction and specify policies
# "redact_pii": True,
# "redact_pii_policies": ["drug", "injury", "person_name"],
# "redact_pii_audio": True,
# # Turn on Auto Highlights
# "auto_highlights": True,
# # Turn on Content Moderation
# "content_safety": True,
# # Turn on Topic Detection
# "iab_categories": True,
# # Turn on Sentiment Analysis
# "sentiment_analysis": True,
# # Turn on Summarization and specify configuration
# "summarization": True,
# "summary_model": "informative",
# "summary_type": "bullets",
# # Turn on Entity Detection
# "entity_detection": True,
# }
# response = requests.post(
# transcript_endpoint,
# json=json,
# headers=headers # Authorization to link this transcription with your account
# )
# polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{response.json()['id']}"
# while True:
# transcription_result = requests.get(polling_endpoint, headers=headers).json()
# if transcription_result['status'] == 'completed':
# break
# elif transcription_result['status'] == 'error':
# raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
# else:
# time.sleep(3)
# return transcription_result['text']
audio_intelligence_list = [
"Summarization",
"Sentiment Analysis"
]
title = """<h1 align="center">🔥Conformer-1 API </h1>"""
description = """
### In this demo, you can explore the outputs of a Conformer-1 Speech Recognition Model from AssemblyAI.
"""
with gr.Blocks() as demo:
gr.HTML(title)
gr.Markdown(description)
with gr.Column(elem_id = "col_container"):
#audio_intelligence_options = gr.CheckboxGroup(audio_intelligence_list, label="Audio Intelligence Options")
inputs = gr.Audio(source = "upload",label = "Upload the input Audio file")
b1 = gr.Button('Process Audio')
with gr.Tabs():
with gr.TabItem('Transcript') as transcript_tab:
transcript = gr.Textbox(label = "Transcript Result" )
with gr.TabItem('Summary', visible = False) as summary_tab:
summary = gr.Textbox(label = "Summary Result")
with gr.TabItem('Sentiment Analysis', visible = False) as sentiment_tab:
sentiment_analysis = gr.Dataframe(label = "Sentiment Analysis Result" )
with gr.TabItem('Topic Detection', visible = False) as topic_detection_tab:
topic_detection = gr.Dataframe(label = "Topic Detection Result" )
b1.click(get_transcript_url, [inputs], [transcript, summary, sentiment_analysis,topic_detection])
examples = gr.Examples(examples = [["audio.mp3"]], inputs = inputs, outputs=[transcript, summary, sentiment_analysis, topic_detection], cache_examples = True, fn = get_transcript_url)
demo.queue().launch(debug=True)