Spaces:
Runtime error
Runtime error
# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc") | |
def YTVideoToText(video_link): | |
# installing & importing libraries | |
from transformers import pipeline | |
from youtube_transcript_api import YouTubeTranscriptApi | |
# fetching video transcript | |
video_id = video_link.split("=")[1] | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
# iterating throughout and adding all text together | |
result = "" | |
for i in transcript: | |
result += ' ' + i['text'] | |
# summarize text | |
summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn") | |
num_iters = int(len(result)/1000) | |
summarized_text = [] | |
summarized_text2 = [] | |
for i in range(0, num_iters + 1): | |
start = 0 | |
start = i * 1000 | |
end = (i + 1) * 1000 | |
out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False) | |
out = out[0] | |
out = out['summary_text'] | |
summarized_text.append(out) | |
summarized_text2 = ' '.join(summarized_text) | |
# returning summary | |
return summarized_text2; | |
# How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/") | |
def postSummaryWithBart(blog_link): | |
# importing libraries | |
from transformers import pipeline | |
from bs4 import BeautifulSoup | |
import requests | |
# loading summarization pipeline | |
summarizer = pipeline("summarization") | |
# getting our blog post | |
URL = blog_link | |
r = requests.get(URL) | |
soup = BeautifulSoup(r.text, 'html.parser') | |
results = soup.find_all(['h1', 'p']) | |
text = [result.text for result in results] | |
ARTICLE = ' '.join(text) | |
# replacing punctuations with end-of-sentence tags | |
ARTICLE = ARTICLE.replace('.', '.') | |
ARTICLE = ARTICLE.replace('?', '?') | |
ARTICLE = ARTICLE.replace('!', '!') | |
sentences = ARTICLE.split('') | |
# chunking text | |
max_chunk = 500 | |
current_chunk = 0 | |
chunks = [] | |
for sentence in sentences: | |
# checking if we have an empty chunk | |
if len(chunks) == current_chunk + 1: | |
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: | |
chunks[current_chunk].extend(sentence.split(' ')) | |
else: | |
current_chunk += 1 | |
chunks.append(sentence.split(' ')) | |
else: | |
print(current_chunk) | |
chunks.append(sentence.split(' ')) | |
for chunk_id in range(len(chunks)): | |
chunks[chunk_id] = ' '.join(chunks[chunk_id]) | |
# summarizing text | |
res = summarizer(chunks, max_length=70, min_length=30, do_sample=False) | |
text = ''.join([summ['summary_text'] for summ in res]) | |
# returning summary | |
return text; | |
# How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""") | |
def abstractiveSummaryWithPegasus(words): | |
# importing & loading model | |
from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") | |
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") | |
# perform summarization | |
tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt") | |
summary = model.generate(**tokens) | |
actual_summ = tokenizer.decode(summary[0]) | |
# returning summary | |
print(actual_summ) | |
import gradio as gr | |
def process(context, question): | |
pass # Implement your question-answering model here... | |
gr.Interface(fn=process, inputs=["text", "text"], outputs=["textbox", "text"]).launch() |