File size: 2,214 Bytes
7fbd2c2
 
9c7c7dd
272d571
7fbd2c2
 
4cf87c9
7fbd2c2
9c7c7dd
 
272d571
7fbd2c2
272d571
 
9c7c7dd
272d571
 
9c7c7dd
 
272d571
 
 
9c7c7dd
 
 
30e894a
272d571
 
 
9c7c7dd
 
272d571
 
 
 
 
 
 
 
 
 
9c7c7dd
d379fcc
272d571
 
d379fcc
30e894a
 
 
 
 
d379fcc
9c7c7dd
 
272d571
30e894a
7fbd2c2
272d571
 
 
 
 
 
 
 
 
 
1dc4ba4
9c1161e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
from transformers import pipeline
import os
import pandas as pd

# Load the text summarization pipeline
summarizer = pipeline("summarization", model="astro21/bart-cls")

chunk_counter = 0


def summarize_text(input_text):
    global chunk_counter
    chunk_counter = 0

    max_chunk_size = 1024
    chunks = [input_text[i:i + max_chunk_size] for i in range(0, len(input_text), max_chunk_size)]

    summarized_chunks = []
    chunk_lengths = []
    summarized_chunks_only = []

    for chunk in chunks:
        chunk_counter += 1
        summarized_chunk = summarizer(chunk, max_length=128, min_length=64, do_sample=False)[0]['summary_text']
        summarized_chunks.append(f"Chunk {chunk_counter}:\n{summarized_chunk}")
        summarized_chunks_only.append(summarized_chunk)

        chunk_lengths.append(len(chunk))

    summarized_text = "\n".join(summarized_chunks)
    summarized_text_only = "\n".join(summarized_chunks_only)

    # Save the merged summary to a file
    with open("summarized.txt", "w") as output_file:
        output_file.write(summarized_text_only)

    chunk_df = pd.DataFrame({'Chunk Number': range(1, chunk_counter + 1), 'Chunk Length': chunk_lengths})

    return summarized_text, chunk_df, "summarized.txt"


def read_file(file):
    print(file[0].name)
    with open(file[0].name, 'r') as file_:
        content = file_.read()
    return content


def summarize_text_file(file):
    if file is not None:
        content = read_file(file)
        return summarize_text(content)


input_type = gr.inputs.File("text")

# Name the outputs using the label parameter and provide a download option
demo = gr.Interface(fn=summarize_text_file, inputs=input_type,
                    outputs=[gr.Textbox(label="Summarized Text"),
                             gr.Dataframe(label="Chunk Information", type="pandas"),
                             gr.File(label="Download Summarized Text", type="file", live=False)],
                    title = "Text Summarization",
                    description = "Summarize text using BART",
                    theme = "huggingface",
                    allow_flagging="never",
                    live=True)

demo.launch(share = True)