Spaces:
Sleeping
Sleeping
File size: 8,209 Bytes
d5710aa 960259f d5710aa 960259f d5710aa 960259f d5710aa 960259f d5710aa e6db824 d5710aa 5f1d785 d5710aa e6db824 d5710aa e6db824 d5710aa f8b9a81 e6db824 d5710aa e6db824 d5710aa e6db824 d5710aa 960259f d5710aa 5934515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
import json
import random
import pymongo
import requests
import gradio as gr
from collections import defaultdict
from uuid import uuid4
DB_CONN = pymongo.MongoClient(os.environ.get("DB_URL"))
line_details = {l["source_id"]:l for l in list(DB_CONN['vo_data_dump']['qa_video_dump'].find()) if l.get("done") is not True}
videos_list = defaultdict(list)
for line_detail in line_details.values():
videos_list[line_detail["video_id"]].append(line_detail)
# Functions
def render_video_details(video_id):
video_detail = videos_list[video_id]
video_title = video_detail[0]["video_title"]
source_language = video_detail[0]["source_language"]
target_language = video_detail[0]["target_language"]
video_duration = video_detail[0]["video_duration"]
line_ids = [v["source_id"] for v in video_detail if v.get("done") is not True]
video_link = video_detail[0]["video_link"]
html = f"""<iframe width="560" height="315" src="{video_link.replace('watch?v=', 'embed/')}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>"""
if video_link.startswith("gs://"):
video_html_embed, video_streamable_url = None, requests.post(os.environ["CF_URL"], params={"url": video_link}).text
else:
video_html_embed, video_streamable_url = html, None
return video_title, source_language, target_language, video_duration, gr.Dropdown.update(choices=line_ids, value=line_ids[0]), video_html_embed, video_streamable_url
def render_line_details(line_id):
line_detail = line_details[line_id]
source_transcript = line_detail["source_transcript"]
translation = line_detail["translator_translated_text"]
response = requests.post(os.environ["CF_URL"], params={"url": line_detail["source_audio_url"]})
signed_uri = response.text
audio_response = requests.get(signed_uri).content
return audio_response, source_transcript, translation
def dump_data_db(line_id, video_type, video_subject, video_topics, video_tone, gender, demography, total_time, source_language, target_language, comments, accent, source_transcript, translation, speaker_gender):
line_detail = line_details[line_id]
video_id = line_detail["video_id"]
line_data = {
"line_id": line_id,
"video_id": video_id,
"src_text": source_transcript,
"intermediate_text": line_detail["english_transcript"],
"tgt_text": translation,
"accent": accent,
"speaking_rate": round(len(translation.split()) / line_detail["line_duration"], 3),
"src_time": line_detail["line_duration"],
"gender": speaker_gender,
"version": "v1",
"done": True
}
video_data = {
"video_id": video_id,
"type": video_type,
"src_lang": source_language,
"tgt_lang": target_language,
"subject": video_subject,
"topics": video_topics,
"tone": video_tone,
"gender": gender,
"demography": demography,
"total_time": float(total_time),
"comments": comments
}
video_data_collection = DB_CONN['vo_data_dump']['video_data']
video_data_collection.update_one({'video_id': video_id}, {"$set": video_data}, upsert=True)
line_data_collection = DB_CONN['vo_data_dump']['line_data']
line_data_collection.update_one({'line_id': line_id}, {"$set": line_data}, upsert=True)
DB_CONN['vo_data_dump']['qa_video_dump'].update_one({'source_id': line_id}, {"$set": {"done": True}}, upsert=True)
# Delete video
i = -1
for i in range(len(videos_list[video_id])):
if videos_list[video_id][i]["source_id"] == line_id:
del videos_list[video_id][i]
break
if len(videos_list[video_id]):
source_ids = [v["source_id"] for v in videos_list[video_id]]
return gr.Dropdown.update(choices=source_ids, value=source_ids[i]), video_id
else:
del videos_list[video_id]
new_video_id = random.choice(list(videos_list.values()))[0]['video_id']
source_ids = [v["source_id"] for v in videos_list[new_video_id] if v.get('done') is not True]
return gr.Dropdown.update(choices=source_ids, value=source_ids[0]), gr.Dropdown.update(choices=list(videos_list.keys()), value=new_video_id)
# UI
with gr.Blocks() as demo:
gr.Markdown("## Data Aggregation")
with gr.Row():
video_id_dropdown = gr.Dropdown(list(videos_list.keys()), label="Video IDs")
video_title_text = gr.Textbox(label="Video Title", interactive=False)
with gr.Row():
video_html = gr.HTML(label="Video")
video_playable = gr.Video(label="Video")
gr.Markdown("""Please ensure to fill these sections and do not leave them empty. An example of how to update `Video Type`, `Video Subject` and `Video Topics`:
- If the video is of a `Biology lecture`, **Video Type**: _learning_, **Video Subject**: _science_, **Video Topics**: _biology_
- If the video is of `Cooking`, **Video Type**: _cooking_, **Video Subject**: _cooking_, **Video Topics**: _indian recipe_""")
with gr.Row():
video_type_dropdown = gr.Dropdown(["learning", "audiobook", "podcast", "vlog", "news", "cooking", "review"], label="Video Type", value="learning", allow_custom_value=True, info="Feel free to add new item if none of them is right.")
video_subject_dropdown = gr.Dropdown(["tech", "science", "lifestyle", "cooking", "travel", "finance", "politics"], label="Video Subject", value="tech", allow_custom_value=True, info="Feel free to add new item if none of them is right.")
video_topics_text = gr.Textbox(label="Video Topics")
with gr.Row():
video_tone_dropdown = gr.Dropdown(["casual", "semi-formal", "formal"], label="Video Tone", value="semi-formal", info="Tone used in the video.")
gender_dropdown = gr.Dropdown(["male", "female", "non-binary"], label="Gender", value="female", info="Gender used by the speaker in the video as whole.")
demography_text = gr.Textbox(label="Demography", info="Demography like normal, old, young, toddler, etc.")
with gr.Row():
source_language_text = gr.Textbox(label="Source Language", interactive=False, visible=False)
target_language_text = gr.Textbox(label="Target Language", interactive=False, visible=False)
with gr.Row():
total_time_text = gr.Textbox(label="Video Duration", interactive=False)
comments_text = gr.Textbox(label="Comments", info="Any extra comments regarding the nature of video.")
gr.Markdown("Video Line Information")
line_id_dropdown = gr.Dropdown([], label="Line IDs")
with gr.Row():
audio_item = gr.Audio(label="Source Audio", type="filepath")
accent_text = gr.Textbox(label="Accent", value="indian", info="Accent of the person speaking. For example, indian, american, british.")
speaker_gender_dropdown = gr.Dropdown(["female", "male"], label="Audio Gender", value="female", info="Gender of the speaker")
with gr.Row():
source_transcript_text = gr.Textbox(label="Source Transcript", info="Transcription of the above rendered audio. Please ensure correct punctuations based on the audio.")
translation_text = gr.Textbox(label="Translation", interactive=False, visible=False)
update_button = gr.Button("Update")
# Actions
video_id_dropdown.change(render_video_details, video_id_dropdown, [video_title_text, source_language_text, target_language_text, total_time_text, line_id_dropdown, video_html, video_playable])
line_id_dropdown.change(render_line_details, line_id_dropdown, [audio_item, source_transcript_text, translation_text])
update_button.click(dump_data_db, [line_id_dropdown, video_type_dropdown, video_subject_dropdown, video_topics_text, video_tone_dropdown, gender_dropdown, demography_text, total_time_text, source_language_text, target_language_text, comments_text, accent_text, source_transcript_text, translation_text, speaker_gender_dropdown], [line_id_dropdown, video_id_dropdown])
if __name__=="__main__":
demo.queue().launch(auth=(os.environ.get("USERNAME"), os.environ.get("PASSWORD"))) |