Spaces:

marianna13
/

annotate-audio

Runtime error

File size: 2,967 Bytes

89d4656
f97c0ed
 
 
 
 
 
 
7ccf883
f97c0ed
4766c38
89d4656
dd657ca
2b05ce7
f97c0ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4766c38
f97c0ed
 
 
 
4766c38
 
 
 
f97c0ed
 
89d4656
f97c0ed
 
 
 
4766c38
 
 
55edc68
 
 
 
 
 
 
 
 
 
 
 
 
f97c0ed
89d4656
 
f97c0ed
89d4656
 
4766c38
f97c0ed
 
5ff5aa7
f97c0ed
 
 
89d4656
f97c0ed


import gradio as gr
import json
import spacy
import re
import string
import pandas as pd
import os
os.system('python -m spacy download en_core_web_sm')
import requests
from textwrap import wrap
import uuid
import gspread

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe('sentencizer')


def download_and_save_file(URL, audio_dir):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'referer': 'https://www.google.com/',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.9,',
        'cookie': 'prov=6bb44cc9-dfe4-1b95-a65d-5250b3b4c9fb; _ga=GA1.2.1363624981.1550767314; __qca=P0-1074700243-1550767314392; notice-ctt=4%3B1550784035760; _gid=GA1.2.1415061800.1552935051; acct=t=4CnQ70qSwPMzOe6jigQlAR28TSW%2fMxzx&s=32zlYt1%2b3TBwWVaCHxH%2bl5aDhLjmq4Xr',
    }
    doc = requests.get(URL, headers=headers)
    file_name = URL.split('/')[-1].split('?')[0]
    audio_path = f'{audio_dir}/{file_name}'
    with open(audio_path, 'wb') as f:
        f.write(doc.content)  
    return audio_path



credentials = os.environ['CREDENTIALS']
data = json.loads(credentials, strict=False)
with open('credentials.json', 'w') as f:
    json.dump(data, f)




title = '🎵 Annotate audio'
description = '''Choose a sentence that describes audio the best if there's no such sentence please choose `No audio description`'''

audio_dir = 'AUDIO'
os.makedirs(audio_dir, exist_ok=True)



gc = gspread.service_account(filename='credentials.json')
sh = gc.open('Annotated CC Audio')
worksheet = sh.sheet1
df = pd.DataFrame(worksheet.get_all_records())
sample_df = df[df['caption']==''].sample(1)

url, audio_url, _, _, full_text, _, _ = sample_df.values[0]
audio_path = download_and_save_file(audio_url, audio_dir)
full_text = full_text.translate(str.maketrans('', '', string.punctuation))
sents = ['\n'.join(wrap(re.sub(r'###audio###\d###', '', s.text), width=70) )for s in nlp(full_text).sents]
sents.append('No audio description')

def audio_demo(cap, audio, annotator, audio_url):
    annotator = annotator if annotator else str(uuid.uuid4())

    df['caption'].loc[df['audio_url'] == audio_url] = cap
    df['annotator'].loc[df['audio_url'] == audio_url] = annotator
    worksheet.update([df.columns.values.tolist()] + df.values.tolist())
    return 'success!'

audio_path, audio_url, sents = sample_df()

iface = gr.Interface(
    audio_demo,  
    inputs=[gr.Radio(sents, label='audio description'), gr.Audio(audio_path, type="filepath"), gr.Textbox(label='please enter your name'), gr.Textbox(value=audio_url, visible=False)],
    outputs=[gr.Textbox(label="output")],
    allow_flagging="never",
    title=title,
    description=description,
    )

if __name__ == "__main__":
  iface.launch(show_error=True, debug=True)