Spaces:

marianna13
/

annotate-audio

Runtime error

File size: 2,639 Bytes

f97c0ed
 
 
 
 
 
 
 
4766c38
2b05ce7
f97c0ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4766c38
f97c0ed
 
 
 
4766c38
 
 
 
f97c0ed
 
4766c38
 
 
 
 
f97c0ed
 
 
 
4766c38
 
 
 
 
f97c0ed
4766c38
f97c0ed
 
4766c38
f97c0ed
4766c38
 
f97c0ed
 
 
 
 
4766c38
f97c0ed

import gradio as gr
import json
import spacy
import re
import string
import pandas as pd
import os
import requests
from textwrap import wrap

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe('sentencizer')


def download_and_save_file(URL, audio_dir):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'referer': 'https://www.google.com/',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.9,',
        'cookie': 'prov=6bb44cc9-dfe4-1b95-a65d-5250b3b4c9fb; _ga=GA1.2.1363624981.1550767314; __qca=P0-1074700243-1550767314392; notice-ctt=4%3B1550784035760; _gid=GA1.2.1415061800.1552935051; acct=t=4CnQ70qSwPMzOe6jigQlAR28TSW%2fMxzx&s=32zlYt1%2b3TBwWVaCHxH%2bl5aDhLjmq4Xr',
    }
    doc = requests.get(URL, headers=headers)
    file_name = URL.split('/')[-1].split('?')[0]
    audio_path = f'{audio_dir}/{file_name}'
    with open(audio_path, 'wb') as f:
        f.write(doc.content)  
    return audio_path



credentials = os.environ['CREDENTIALS']
data = json.loads(credentials, strict=False)
with open('credentials.json', 'w') as f:
    json.dump(data, f)


gc = gspread.service_account(filename='credentials.json')
sh = gc.open('Annotated CC Audio')
worksheet = sh.sheet1
df = pd.DataFrame(worksheet.get_all_records())
sample_df = df[df['caption']==''].sample(1)

title = '🎵 Annotate audio'
description = '''Choose a sentence that describes audio the best if there's no such sentence please choose `No audio description`'''

audio_dir = 'AUDIO'
os.makedirs(audio_dir, exist_ok=True)

audio_id, audio_url, full_text, _ = sample_df.values[0]
audio_path = download_and_save_file(audio_url, audio_dir)
full_text = full_text.translate(str.maketrans('', '', string.punctuation))
sents = ['\n'.join(wrap(re.sub(r'###audio###\d###', '', s.text), width=70) )for s in nlp(full_text).sents]
sents.append('No audio description')

def audio_demo(cap, audio, audio_id):

    df.at[int(audio_id)-1, 'caption'] = cap
    worksheet.update([df.columns.values.tolist()] + df.values.tolist())
    return 'success!'


iface = gr.Interface(
    audio_demo,  
    inputs=[gr.Dropdown(sents, label='audio description'), gr.Audio(audio_path, type="filepath"), gr.Textbox(value=audio_id, visible=False)],
    outputs=[gr.Textbox(label="output")],
    allow_flagging="never",
    title=title,
    description=description,
    )

if __name__ == "__main__":
  iface.launch(show_error=True, debug=True)