Spaces:
Runtime error
Runtime error
Update util.py
Browse files
util.py
CHANGED
@@ -37,6 +37,7 @@ def get_subtitles(video_url):
|
|
37 |
video_id = video_url.split("v=")[1]
|
38 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
39 |
subs = " ".join(entry['text'] for entry in transcript)
|
|
|
40 |
|
41 |
return transcript, subs
|
42 |
|
@@ -95,14 +96,12 @@ def split_into_chunks(text, max_words=800, overlap_sentences=2):
|
|
95 |
else:
|
96 |
if len(current_chunk) >= overlap_sentences:
|
97 |
overlap = current_chunk[-overlap_sentences:]
|
98 |
-
print(f"Overlapping sentences: {' '.join(overlap)}")
|
99 |
chunks.append(' '.join(current_chunk))
|
100 |
current_chunk = current_chunk[-overlap_sentences:] + [sentence]
|
101 |
current_word_count = sum(len(sent.split()) for sent in current_chunk)
|
102 |
if current_chunk:
|
103 |
if len(current_chunk) >= overlap_sentences:
|
104 |
overlap = current_chunk[-overlap_sentences:]
|
105 |
-
print(f"Overlapping sentences: {' '.join(overlap)}")
|
106 |
chunks.append(' '.join(current_chunk))
|
107 |
|
108 |
return chunks
|
@@ -130,7 +129,7 @@ def pipeline(url, model, tokenizer):
|
|
130 |
chunks = split_into_chunks(vie_sub, 700, 2)
|
131 |
sum_para = []
|
132 |
for i in chunks:
|
133 |
-
tmp = summarize(i, model, tokenizer, num_beams=
|
134 |
sum_para.append(tmp)
|
135 |
sum = ''.join(sum_para)
|
136 |
del sub, vie_sub, sum_para, chunks
|
|
|
37 |
video_id = video_url.split("v=")[1]
|
38 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
39 |
subs = " ".join(entry['text'] for entry in transcript)
|
40 |
+
print(sub)
|
41 |
|
42 |
return transcript, subs
|
43 |
|
|
|
96 |
else:
|
97 |
if len(current_chunk) >= overlap_sentences:
|
98 |
overlap = current_chunk[-overlap_sentences:]
|
|
|
99 |
chunks.append(' '.join(current_chunk))
|
100 |
current_chunk = current_chunk[-overlap_sentences:] + [sentence]
|
101 |
current_word_count = sum(len(sent.split()) for sent in current_chunk)
|
102 |
if current_chunk:
|
103 |
if len(current_chunk) >= overlap_sentences:
|
104 |
overlap = current_chunk[-overlap_sentences:]
|
|
|
105 |
chunks.append(' '.join(current_chunk))
|
106 |
|
107 |
return chunks
|
|
|
129 |
chunks = split_into_chunks(vie_sub, 700, 2)
|
130 |
sum_para = []
|
131 |
for i in chunks:
|
132 |
+
tmp = summarize(i, model, tokenizer, num_beams=3)
|
133 |
sum_para.append(tmp)
|
134 |
sum = ''.join(sum_para)
|
135 |
del sub, vie_sub, sum_para, chunks
|