Spaces:

marianna13
/

annotate-audio

Runtime error

App Files Files Community

annotate-audio / app.py

marianna13

Update app.py

4766c38 over 2 years ago

raw

history blame

2.64 kB

	import gradio as gr
	import json
	import spacy
	import re
	import string
	import pandas as pd
	import os
	import requests
	from textwrap import wrap

	nlp = spacy.load("en_core_web_sm")
	nlp.add_pipe('sentencizer')


	def download_and_save_file(URL, audio_dir):
	headers = {
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
	'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8',
	'referer': 'https://www.google.com/',
	'accept-encoding': 'gzip, deflate, br',
	'accept-language': 'en-US,en;q=0.9,',
	'cookie': 'prov=6bb44cc9-dfe4-1b95-a65d-5250b3b4c9fb; _ga=GA1.2.1363624981.1550767314; __qca=P0-1074700243-1550767314392; notice-ctt=4%3B1550784035760; _gid=GA1.2.1415061800.1552935051; acct=t=4CnQ70qSwPMzOe6jigQlAR28TSW%2fMxzx&s=32zlYt1%2b3TBwWVaCHxH%2bl5aDhLjmq4Xr',
	}
	doc = requests.get(URL, headers=headers)
	file_name = URL.split('/')[-1].split('?')[0]
	audio_path = f'{audio_dir}/{file_name}'
	with open(audio_path, 'wb') as f:
	f.write(doc.content)
	return audio_path



	credentials = os.environ['CREDENTIALS']
	data = json.loads(credentials, strict=False)
	with open('credentials.json', 'w') as f:
	json.dump(data, f)


	gc = gspread.service_account(filename='credentials.json')
	sh = gc.open('Annotated CC Audio')
	worksheet = sh.sheet1
	df = pd.DataFrame(worksheet.get_all_records())
	sample_df = df[df['caption']==''].sample(1)

	title = '🎵 Annotate audio'
	description = '''Choose a sentence that describes audio the best if there's no such sentence please choose `No audio description`'''

	audio_dir = 'AUDIO'
	os.makedirs(audio_dir, exist_ok=True)

	audio_id, audio_url, full_text, _ = sample_df.values[0]
	audio_path = download_and_save_file(audio_url, audio_dir)
	full_text = full_text.translate(str.maketrans('', '', string.punctuation))
	sents = ['\n'.join(wrap(re.sub(r'###audio###\d###', '', s.text), width=70) )for s in nlp(full_text).sents]
	sents.append('No audio description')

	def audio_demo(cap, audio, audio_id):

	df.at[int(audio_id)-1, 'caption'] = cap
	worksheet.update([df.columns.values.tolist()] + df.values.tolist())
	return 'success!'


	iface = gr.Interface(
	audio_demo,
	inputs=[gr.Dropdown(sents, label='audio description'), gr.Audio(audio_path, type="filepath"), gr.Textbox(value=audio_id, visible=False)],
	outputs=[gr.Textbox(label="output")],
	allow_flagging="never",
	title=title,
	description=description,
	)

	if __name__ == "__main__":
	iface.launch(show_error=True, debug=True)