astro21 commited on
Commit
8824c6a
·
1 Parent(s): a27bf44

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import pandas as pd
4
+
5
+ # Load the text summarization pipeline
6
+ summarizer = pipeline("summarization", model="astro21/bart-cls")
7
+
8
+ chunk_counter = 0
9
+
10
+
11
+ def summarize_text(input_text):
12
+ global chunk_counter
13
+ chunk_counter = 0
14
+
15
+ max_chunk_size = 1024
16
+ chunks = [input_text[i:i + max_chunk_size] for i in range(0, len(input_text), max_chunk_size)]
17
+
18
+ summarized_chunks = []
19
+ chunk_lengths = []
20
+ summarized_chunks_only = []
21
+
22
+ for chunk in chunks:
23
+ chunk_counter += 1
24
+ summarized_chunk = summarizer(chunk, max_length=128, min_length=64, do_sample=False)[0]['summary_text']
25
+ summarized_chunks.append(f"Chunk {chunk_counter}:\n{summarized_chunk}")
26
+ summarized_chunks_only.append(summarized_chunk)
27
+
28
+ chunk_lengths.append(len(chunk))
29
+
30
+ summarized_text = "\n".join(summarized_chunks)
31
+ summarized_text_only = "\n".join(summarized_chunks_only)
32
+
33
+ # Save the merged summary to a file
34
+ with open("summarized.txt", "w") as output_file:
35
+ output_file.write(summarized_text_only)
36
+
37
+ chunk_df = pd.DataFrame({'Chunk Number': range(1, chunk_counter + 1), 'Chunk Length': chunk_lengths})
38
+
39
+ return summarized_text, chunk_df, "summarized.txt"
40
+
41
+
42
+ def summarize_text_file(file):
43
+ if file is not None:
44
+ content = file.read()
45
+ return summarize_text(content)
46
+
47
+
48
+ st.title("Text Summarization")
49
+ st.write("Summarize text using BART")
50
+
51
+ uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
52
+
53
+ if uploaded_file is not None:
54
+ result = summarize_text_file(uploaded_file)
55
+ st.subheader("Summarized Text")
56
+ st.write(result[0])
57
+
58
+ st.subheader("Chunk Information")
59
+ st.write(result[1])
60
+
61
+ st.markdown(get_binary_file_downloader_html(result[2], "Download Summarized Text"), unsafe_allow_html=True)