marianna13 commited on
Commit
9d65325
·
1 Parent(s): ad82935

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -21
app.py CHANGED
@@ -1,19 +1,16 @@
1
 
2
  import gradio as gr
3
  import json
4
- import spacy
5
  import re
6
  import string
7
  import pandas as pd
8
  import os
9
- os.system('python -m spacy download en_core_web_sm')
10
  import requests
11
  from textwrap import wrap
12
  import uuid
13
  import gspread
 
14
 
15
- nlp = spacy.load("en_core_web_sm")
16
- nlp.add_pipe('sentencizer')
17
 
18
 
19
  def download_and_save_file(URL, audio_dir):
@@ -48,32 +45,52 @@ description = '''Choose a sentence that describes audio the best if there's no s
48
  audio_dir = 'AUDIO'
49
  os.makedirs(audio_dir, exist_ok=True)
50
 
51
-
52
-
53
- gc = gspread.service_account(filename='credentials.json')
54
- sh = gc.open('Annotated CC Audio')
55
- worksheet = sh.sheet1
56
- df = pd.DataFrame(worksheet.get_all_records())
57
- sample_df = df[df['caption']==''].sample(1)
58
-
59
- url, audio_url, _, _, full_text, _, _ = sample_df.values[0]
60
- audio_path = download_and_save_file(audio_url, audio_dir)
61
- full_text = full_text.translate(str.maketrans('', '', string.punctuation))
62
- sents = ['\n'.join(wrap(re.sub(r'###audio###\d###', '', s.text), width=70) )for s in nlp(full_text).sents]
63
- sents.append('No audio description')
64
-
65
- def audio_demo(cap, audio, annotator, audio_url):
 
 
 
 
 
 
 
 
 
66
  annotator = annotator if annotator else str(uuid.uuid4())
67
-
 
 
 
68
  df['caption'].loc[df['audio_url'] == audio_url] = cap
69
  df['annotator'].loc[df['audio_url'] == audio_url] = annotator
70
  worksheet.update([df.columns.values.tolist()] + df.values.tolist())
71
  return 'success!'
72
 
 
73
 
74
  iface = gr.Interface(
75
  audio_demo,
76
- inputs=[gr.Radio(sents, label='audio description'), gr.Audio(audio_path, type="filepath"), gr.Textbox(label='please enter your name'), gr.Textbox(value=audio_url, visible=False)],
 
 
 
 
 
 
 
77
  outputs=[gr.Textbox(label="output")],
78
  allow_flagging="never",
79
  title=title,
 
1
 
2
  import gradio as gr
3
  import json
 
4
  import re
5
  import string
6
  import pandas as pd
7
  import os
 
8
  import requests
9
  from textwrap import wrap
10
  import uuid
11
  import gspread
12
+ import ast
13
 
 
 
14
 
15
 
16
  def download_and_save_file(URL, audio_dir):
 
45
  audio_dir = 'AUDIO'
46
  os.makedirs(audio_dir, exist_ok=True)
47
 
48
+ def sample_df():
49
+
50
+ gc = gspread.service_account(filename='credentials.json')
51
+ sh = gc.open('Annotated CC Audio')
52
+ worksheet = sh.sheet1
53
+ df = pd.DataFrame(worksheet.get_all_records())
54
+ sample_df = df[df['caption']==''].sample(1)
55
+
56
+ audio_url, audio_meta, page_title, img_metadata, sibling_elems = sample_df[['audio_url', 'audio_meta', 'page_title', 'imgs_metadata', 'sibling_elems']].values[0]
57
+ audio_path = download_and_save_file(audio_url, audio_dir)
58
+ sibling_elems = ast.literal_eval(sibling_elems)
59
+ sibling_elems = [s.replace('\n', '') for s in sibling_elems]
60
+ sibling_elems = [s for s in sibling_elems if len(s) > 0]
61
+ sibling_elems = list(set(sibling_elems))
62
+ img_metadata = ast.literal_eval(img_metadata)
63
+ if len(img_metadata) > 0:
64
+ img_metadata = [[f'{k}: {meta[k]}' for k in meta] for meta in img_metadata]
65
+ audio_meta = ast.literal_eval(audio_meta).get('tags', None)
66
+ if audio_meta:
67
+ audio_meta = [f'{k}: {audio_meta[k]}' for k in audio_meta.keys() if k.lower() in ['title', 'album', 'artist', 'genre', 'date', 'language']]
68
+ audio_meta = '; '.join(audio_meta)
69
+ return audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet
70
+
71
+ def audio_demo(siblings, page_title, audio_meta, audio, annotator, audio_url):
72
  annotator = annotator if annotator else str(uuid.uuid4())
73
+ siblings.extend(page_title)
74
+ siblings.extend(audio_meta)
75
+ siblings = [s for s in siblings if s!=[]]
76
+ cap = '\n'.join(siblings)
77
  df['caption'].loc[df['audio_url'] == audio_url] = cap
78
  df['annotator'].loc[df['audio_url'] == audio_url] = annotator
79
  worksheet.update([df.columns.values.tolist()] + df.values.tolist())
80
  return 'success!'
81
 
82
+ audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet = sample_df()
83
 
84
  iface = gr.Interface(
85
  audio_demo,
86
+ inputs=[
87
+ gr.CheckboxGroup(sibling_elems, label='sibling elements text'),
88
+ gr.CheckboxGroup(label='page title', choices=[page_title]),
89
+ gr.CheckboxGroup([audio_meta], label='audio metadata'),
90
+ gr.Audio(audio_path, type="filepath"),
91
+ gr.Textbox(label='please enter your name'),
92
+ gr.Textbox(value=audio_url, visible=False)
93
+ ],
94
  outputs=[gr.Textbox(label="output")],
95
  allow_flagging="never",
96
  title=title,