ccxccc commited on
Commit
eb2166e
·
verified ·
1 Parent(s): 6f37fff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -28
app.py CHANGED
@@ -1,41 +1,50 @@
1
  from transformers import pipeline
2
  from youtube_transcript_api import YouTubeTranscriptApi
3
- #from IPython.display import YouTubeVideo
4
  import streamlit as st
 
5
 
6
  def summarize(result):
7
  summarizer = pipeline('summarization')
8
  num_iters = int(len(result)/1000)
9
  summarized_text = []
10
  for i in range(0, num_iters + 1):
11
- start = 0
12
- start = i * 1000
13
- end = (i + 1) * 1000
14
- print("input text \n" + result[start:end])
15
- out = summarizer(result[start:end])
16
- out = out[0]
17
- out = out['summary_text']
18
- print("Summarized text\n"+out)
19
- summarized_text.append(out)
20
  st.write(summarized_text)
21
 
22
- def get_transcript(video_id):
23
- #youtube_video = "https://www.youtube.com/watch?v=A4OmtyaBHFE"
24
- #video_id = youtube_video.split("=")[1]
25
- #video_id
26
- #st.write(YouTubeVideo(video_id))
27
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
28
- print(transcript[0:5])
29
-
30
- result = ""
31
- for i in transcript:
32
- result += ' ' + i['text']
33
- #print(result)
34
- print(len(result))
35
- st.write(result)
 
 
 
 
 
 
 
 
36
 
37
  def main():
38
- video_id=st.text_input("enter video id ")
39
- if video_id:
40
- get_transcript(video_id)
41
- main()
 
 
1
  from transformers import pipeline
2
  from youtube_transcript_api import YouTubeTranscriptApi
 
3
  import streamlit as st
4
+ import re
5
 
6
  def summarize(result):
7
  summarizer = pipeline('summarization')
8
  num_iters = int(len(result)/1000)
9
  summarized_text = []
10
  for i in range(0, num_iters + 1):
11
+ start = 0
12
+ start = i * 1000
13
+ end = (i + 1) * 1000
14
+ print("input text \n" + result[start:end])
15
+ out = summarizer(result[start:end])
16
+ out = out[0]
17
+ out = out['summary_text']
18
+ print("Summarized text\n"+out)
19
+ summarized_text.append(out)
20
  st.write(summarized_text)
21
 
22
+ def get_transcript(video_link):
23
+ video_id = extract_video_id(video_link)
24
+ if video_id:
25
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
26
+ result = ""
27
+ for i in transcript:
28
+ result += ' ' + i['text']
29
+ print(len(result))
30
+ st.write(result)
31
+ # If you want to summarize the transcript
32
+ summarize(result)
33
+ else:
34
+ st.write("Invalid YouTube video link")
35
+
36
+ def extract_video_id(video_link):
37
+ # Regular expression to extract video ID from YouTube video link
38
+ regex = r"(?:https:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
39
+ matches = re.findall(regex, video_link)
40
+ if matches:
41
+ return matches[0]
42
+ else:
43
+ return None
44
 
45
  def main():
46
+ video_link = st.text_input("Enter YouTube video link:")
47
+ if video_link:
48
+ get_transcript(video_link)
49
+
50
+ main()