SpeedWatch / app.py
mbCrypto's picture
Update app.py
609c3ee
raw
history blame
3.56 kB
# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc")
def YTVideoToText(video_link):
# installing & importing libraries
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
# fetching video transcript
video_id = video_link.split("=")[1]
transcript = YouTubeTranscriptApi.get_transcript(video_id)
# iterating throughout and adding all text together
result = ""
for i in transcript:
result += ' ' + i['text']
# summarize text
summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn")
num_iters = int(len(result)/1000)
summarized_text = []
summarized_text2 = []
for i in range(0, num_iters + 1):
start = 0
start = i * 1000
end = (i + 1) * 1000
out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False)
out = out[0]
out = out['summary_text']
summarized_text.append(out)
summarized_text2 = ' '.join(summarized_text)
# returning summary
return summarized_text2;
# How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/")
def postSummaryWithBart(blog_link):
# importing libraries
from transformers import pipeline
from bs4 import BeautifulSoup
import requests
# loading summarization pipeline
summarizer = pipeline("summarization")
# getting our blog post
URL = blog_link
r = requests.get(URL)
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
ARTICLE = ' '.join(text)
# replacing punctuations with end-of-sentence tags
ARTICLE = ARTICLE.replace('.', '.')
ARTICLE = ARTICLE.replace('?', '?')
ARTICLE = ARTICLE.replace('!', '!')
sentences = ARTICLE.split('')
# chunking text
max_chunk = 500
current_chunk = 0
chunks = []
for sentence in sentences:
# checking if we have an empty chunk
if len(chunks) == current_chunk + 1:
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
chunks[current_chunk].extend(sentence.split(' '))
else:
current_chunk += 1
chunks.append(sentence.split(' '))
else:
print(current_chunk)
chunks.append(sentence.split(' '))
for chunk_id in range(len(chunks)):
chunks[chunk_id] = ' '.join(chunks[chunk_id])
# summarizing text
res = summarizer(chunks, max_length=70, min_length=30, do_sample=False)
text = ''.join([summ['summary_text'] for summ in res])
# returning summary
return text;
# How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""")
def abstractiveSummaryWithPegasus(words):
# importing & loading model
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
# perform summarization
tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt")
summary = model.generate(**tokens)
actual_summ = tokenizer.decode(summary[0])
# returning summary
print(actual_summ)
import gradio as gr
def process(context, question):
pass # Implement your question-answering model here...
gr.Interface(fn=process, inputs=["text", "text"], outputs=["textbox", "text"]).launch()