File size: 3,560 Bytes
609c3ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d48c9b
609c3ee
 
 
 
 
0d48c9b
609c3ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d48c9b
609c3ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc")
def YTVideoToText(video_link):
    # installing & importing libraries
    from transformers import pipeline
    from youtube_transcript_api import YouTubeTranscriptApi

    # fetching video transcript
    video_id = video_link.split("=")[1]
    transcript = YouTubeTranscriptApi.get_transcript(video_id)

    # iterating throughout and adding all text together
    result = ""
    for i in transcript:
        result += ' ' + i['text']

    # summarize text
    summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn")
    
    num_iters = int(len(result)/1000)
    summarized_text = []
    summarized_text2 = []
    for i in range(0, num_iters + 1):
        start = 0
        start = i * 1000
        end = (i + 1) * 1000
        out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False)
        out = out[0]
        out = out['summary_text']
        summarized_text.append(out)
        summarized_text2 = ' '.join(summarized_text)

    # returning summary
    return summarized_text2;


# How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/")
def postSummaryWithBart(blog_link):
    # importing libraries
    from transformers import pipeline
    from bs4 import BeautifulSoup
    import requests

    # loading summarization pipeline
    summarizer = pipeline("summarization")

    # getting our blog post
    URL = blog_link
    r = requests.get(URL)
    soup = BeautifulSoup(r.text, 'html.parser')
    results = soup.find_all(['h1', 'p'])
    text = [result.text for result in results]
    ARTICLE = ' '.join(text)

    # replacing punctuations with end-of-sentence tags
    ARTICLE = ARTICLE.replace('.', '.')
    ARTICLE = ARTICLE.replace('?', '?')
    ARTICLE = ARTICLE.replace('!', '!')
    sentences = ARTICLE.split('')

    # chunking text
    max_chunk = 500
    current_chunk = 0 
    chunks = []
    for sentence in sentences:
        # checking if we have an empty chunk 
        if len(chunks) == current_chunk + 1: 
            if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
                chunks[current_chunk].extend(sentence.split(' '))
            else:
                current_chunk += 1
                chunks.append(sentence.split(' '))
        else:
            print(current_chunk)
            chunks.append(sentence.split(' '))
    for chunk_id in range(len(chunks)):
        chunks[chunk_id] = ' '.join(chunks[chunk_id])

    # summarizing text
    res = summarizer(chunks, max_length=70, min_length=30, do_sample=False)
    text = ''.join([summ['summary_text'] for summ in res])

    # returning summary
    return text;


# How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""")
def abstractiveSummaryWithPegasus(words):
    # importing & loading model
    from transformers import PegasusForConditionalGeneration, PegasusTokenizer
    tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
    model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

    # perform summarization
    tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt")
    summary = model.generate(**tokens)
    actual_summ = tokenizer.decode(summary[0])

    # returning summary
    print(actual_summ)


import gradio as gr
def process(context, question):
    pass  # Implement your question-answering model here...

gr.Interface(fn=process, inputs=["text", "text"], outputs=["textbox", "text"]).launch()