datasciencedojo's picture
Update app.py
89c5210
raw
history blame
2.53 kB
from youtube_transcript_api import YouTubeTranscriptApi as yta
import gradio as gr
from google_trans_new import google_translator
opts = ['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani', 'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa', 'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish', 'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian', 'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian', 'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish', 'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)', 'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy', 'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali', 'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian', 'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian', 'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish', 'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu']
def transcript_generator(link,option):
yt_link = str(link)
yt_id = yt_link.partition("=")[2]
data = yta.get_transcript(yt_id)
transcript = ''
for value in data:
for key,val in value.items():
if key == 'text':
transcript += val
translator = google_translator()
translation = translator.translate(transcript, lang_tgt = option).text
return transcript,translation
with gr.Blocks() as demo:
input1 = gr.Textbox(label="Enter YouTube Link",value='',lines=1)
input2 = gr.Dropdown(opts)
with gr.Tab("Transcript"):
output1 = gr.Textbox(label="Transcript",lines=10)
with gr.Tab("Translated Transcript"):
output2 = gr.Textbox(label="Translation",lines=10)
btn = gr.Button(value="Submit")
btn.click(transcript_generator, inputs=[input1,input2], outputs=[output1,output2])
gr.Examples(
[["https://www.youtube.com/watch?v=47dtFZ8CFo8",'chinese (simplified)'], ["https://www.youtube.com/watch?v=hT_nvWreIhg",'french'],['https://www.youtube.com/watch?v=PMs76lrqiA4','urdu']],
[input1,input2],
transcript_generator,
)
if __name__ == "__main__":
demo.launch()