Spaces:
Runtime error
Runtime error
File size: 3,560 Bytes
609c3ee 0d48c9b 609c3ee 0d48c9b 609c3ee 0d48c9b 609c3ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc")
def YTVideoToText(video_link):
# installing & importing libraries
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
# fetching video transcript
video_id = video_link.split("=")[1]
transcript = YouTubeTranscriptApi.get_transcript(video_id)
# iterating throughout and adding all text together
result = ""
for i in transcript:
result += ' ' + i['text']
# summarize text
summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn")
num_iters = int(len(result)/1000)
summarized_text = []
summarized_text2 = []
for i in range(0, num_iters + 1):
start = 0
start = i * 1000
end = (i + 1) * 1000
out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False)
out = out[0]
out = out['summary_text']
summarized_text.append(out)
summarized_text2 = ' '.join(summarized_text)
# returning summary
return summarized_text2;
# How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/")
def postSummaryWithBart(blog_link):
# importing libraries
from transformers import pipeline
from bs4 import BeautifulSoup
import requests
# loading summarization pipeline
summarizer = pipeline("summarization")
# getting our blog post
URL = blog_link
r = requests.get(URL)
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
ARTICLE = ' '.join(text)
# replacing punctuations with end-of-sentence tags
ARTICLE = ARTICLE.replace('.', '.')
ARTICLE = ARTICLE.replace('?', '?')
ARTICLE = ARTICLE.replace('!', '!')
sentences = ARTICLE.split('')
# chunking text
max_chunk = 500
current_chunk = 0
chunks = []
for sentence in sentences:
# checking if we have an empty chunk
if len(chunks) == current_chunk + 1:
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
chunks[current_chunk].extend(sentence.split(' '))
else:
current_chunk += 1
chunks.append(sentence.split(' '))
else:
print(current_chunk)
chunks.append(sentence.split(' '))
for chunk_id in range(len(chunks)):
chunks[chunk_id] = ' '.join(chunks[chunk_id])
# summarizing text
res = summarizer(chunks, max_length=70, min_length=30, do_sample=False)
text = ''.join([summ['summary_text'] for summ in res])
# returning summary
return text;
# How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""")
def abstractiveSummaryWithPegasus(words):
# importing & loading model
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
# perform summarization
tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt")
summary = model.generate(**tokens)
actual_summ = tokenizer.decode(summary[0])
# returning summary
print(actual_summ)
import gradio as gr
def process(context, question):
pass # Implement your question-answering model here...
gr.Interface(fn=process, inputs=["text", "text"], outputs=["textbox", "text"]).launch() |