from youtube_transcript_api import YouTubeTranscriptApi as yta import gradio as gr from deep_translator import GoogleTranslator opts = ['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani', 'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa', 'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish', 'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian', 'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian', 'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish', 'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)', 'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy', 'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali', 'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian', 'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian', 'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish', 'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'] def transcript_generator(link,option): yt_link = str(link) yt_id = yt_link.partition("=")[2] data = yta.get_transcript(yt_id) transcript = '' for value in data: for key,val in value.items(): if key == 'text': transcript += val translation = GoogleTranslator(source='auto', target=option).translate(transcript) return transcript,translation css = """ footer {display:none !important} .output-markdown{display:none !important} footer {visibility: hidden} #component-4 textarea[data-testid="textbox"] { height: 178px !important} .max-h-[30rem] {max-height: 18rem !important;} .hover\:bg-orange-50:hover { --tw-bg-opacity: 1 !important; background-color: rgb(229,225,255) !important; } """ with gr.Blocks(title="Wikipedia Article Scrape | Data Science Dojo", css = css) as demo: input1 = gr.Textbox(label="Enter YouTube Link",value='',lines=1) input2 = gr.Dropdown(opts) with gr.Tab("Transcript"): output1 = gr.Textbox(label="Transcript",lines=10) with gr.Tab("Translated Transcript"): output2 = gr.Textbox(label="Translation",lines=10) btn = gr.Button(value="Submit") btn.click(transcript_generator, inputs=[input1,input2], outputs=[output1,output2]) gr.Examples( [["https://www.youtube.com/watch?v=47dtFZ8CFo8",'chinese (simplified)'], ["https://www.youtube.com/watch?v=hT_nvWreIhg",'french'],['https://www.youtube.com/watch?v=PMs76lrqiA4','urdu']], fn = transcript_generator, inputs = [input1,input2], outputs = [output1,output2], cache_examples = True, ) if __name__ == "__main__": demo.launch()