Spaces:
Sleeping
Sleeping
File size: 2,214 Bytes
7fbd2c2 9c7c7dd 272d571 7fbd2c2 4cf87c9 7fbd2c2 9c7c7dd 272d571 7fbd2c2 272d571 9c7c7dd 272d571 9c7c7dd 272d571 9c7c7dd 30e894a 272d571 9c7c7dd 272d571 9c7c7dd d379fcc 272d571 d379fcc 30e894a d379fcc 9c7c7dd 272d571 30e894a 7fbd2c2 272d571 1dc4ba4 9c1161e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
from transformers import pipeline
import os
import pandas as pd
# Load the text summarization pipeline
summarizer = pipeline("summarization", model="astro21/bart-cls")
chunk_counter = 0
def summarize_text(input_text):
global chunk_counter
chunk_counter = 0
max_chunk_size = 1024
chunks = [input_text[i:i + max_chunk_size] for i in range(0, len(input_text), max_chunk_size)]
summarized_chunks = []
chunk_lengths = []
summarized_chunks_only = []
for chunk in chunks:
chunk_counter += 1
summarized_chunk = summarizer(chunk, max_length=128, min_length=64, do_sample=False)[0]['summary_text']
summarized_chunks.append(f"Chunk {chunk_counter}:\n{summarized_chunk}")
summarized_chunks_only.append(summarized_chunk)
chunk_lengths.append(len(chunk))
summarized_text = "\n".join(summarized_chunks)
summarized_text_only = "\n".join(summarized_chunks_only)
# Save the merged summary to a file
with open("summarized.txt", "w") as output_file:
output_file.write(summarized_text_only)
chunk_df = pd.DataFrame({'Chunk Number': range(1, chunk_counter + 1), 'Chunk Length': chunk_lengths})
return summarized_text, chunk_df, "summarized.txt"
def read_file(file):
print(file[0].name)
with open(file[0].name, 'r') as file_:
content = file_.read()
return content
def summarize_text_file(file):
if file is not None:
content = read_file(file)
return summarize_text(content)
input_type = gr.inputs.File("text")
# Name the outputs using the label parameter and provide a download option
demo = gr.Interface(fn=summarize_text_file, inputs=input_type,
outputs=[gr.Textbox(label="Summarized Text"),
gr.Dataframe(label="Chunk Information", type="pandas"),
gr.File(label="Download Summarized Text", type="file", live=False)],
title = "Text Summarization",
description = "Summarize text using BART",
theme = "huggingface",
allow_flagging="never",
live=True)
demo.launch(share = True)
|