File size: 3,006 Bytes
272f692
 
ba9fb78
22216e9
4496fc2
 
272f692
 
 
 
 
 
 
 
 
 
 
 
ba9fb78
 
4496fc2
272f692
ef2021d
 
 
 
 
 
 
 
 
 
 
 
 
272f692
 
4496fc2
 
 
 
 
 
272f692
4496fc2
140ee0b
3b8dc0a
4496fc2
530b49f
 
7b71e07
51fd608
272f692
 
4558b86
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from youtube_transcript_api import YouTubeTranscriptApi as yta
import gradio as gr
from deep_translator import GoogleTranslator

opts = ['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani', 'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa', 'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish', 'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian', 'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian', 'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish', 'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)', 'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy', 'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali', 'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian', 'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian', 'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish', 'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu']
def transcript_generator(link,option):

  yt_link = str(link)
  yt_id = yt_link.partition("=")[2]

  data = yta.get_transcript(yt_id)

  transcript = ''
  for value in data:
    for key,val in value.items():
      if key == 'text':
        transcript += val

  translation = GoogleTranslator(source='auto', target=option).translate(transcript)
  
  return transcript,translation

css = """
footer {display:none !important}
.output-markdown{display:none !important}
footer {visibility: hidden} 
#component-4 textarea[data-testid="textbox"] { height: 178px !important}
.max-h-[30rem] {max-height: 18rem !important;}
.hover\:bg-orange-50:hover {
    --tw-bg-opacity: 1 !important;
    background-color: rgb(229,225,255) !important;
}
"""

with gr.Blocks(title="Wikipedia Article Scrape | Data Science Dojo", css = css) as demo:


    input1 = gr.Textbox(label="Enter YouTube Link",value='',lines=1)
    input2 = gr.Dropdown(opts)
    with gr.Tab("Transcript"):
      output1 = gr.Textbox(label="Transcript",lines=10)
    with gr.Tab("Translated Transcript"):
      output2 = gr.Textbox(label="Translation",lines=10)
    btn = gr.Button(value="Submit")
    btn.click(transcript_generator, inputs=[input1,input2], outputs=[output1,output2])
    
    gr.Examples(
        [["https://www.youtube.com/watch?v=47dtFZ8CFo8",'chinese (simplified)'], ["https://www.youtube.com/watch?v=hT_nvWreIhg",'french'],['https://www.youtube.com/watch?v=PMs76lrqiA4','urdu']],
        fn = transcript_generator,
        inputs = [input1,input2],
        outputs = [output1,output2],
        cache_examples = True,
    )
if __name__ == "__main__":
    demo.launch()