File size: 2,582 Bytes
caf0283
d769310
756850c
d769310
 
53545b3
d769310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6895495
d769310
 
 
 
756850c
d769310
 
 
 
756850c
d769310
 
 
756850c
d769310
 
 
756850c
d769310
 
 
 
6895495
d769310
 
 
6895495
d769310
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import streamlit as st
from transformers import pipeline
import os
# Set Hugging Face cache directory
os.environ['TRANSFORMERS_CACHE'] = os.getenv('HF_HOME', os.path.expanduser('~/.cache/huggingface/hub'))

# Function to load all three models
@st.cache_resource
def load_models():
    bart_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    t5_summarizer = pipeline("summarization", model="t5-large")
    pegasus_summarizer = pipeline("summarization", model="google/pegasus-cnn_dailymail")
    return bart_summarizer, t5_summarizer, pegasus_summarizer

# Streamlit app layout
st.title("Text Summarization with Pre-trained Models: BART, T5, Pegasus")

# Load models
with st.spinner("Loading models..."):
    bart_model, t5_model, pegasus_model = load_models()

# Input text
text_input = st.text_area("Enter text to summarize:")

# Compression rate slider
compression_rate = st.slider(
    "Summary Compression Rate", 
    min_value=0.1, 
    max_value=0.5, 
    value=0.3, 
    step=0.05,
    help="Adjust how much of the original text to keep in the summary"
)

if text_input:
    word_count = len(text_input.split())
    st.write(f"**Input Word Count:** {word_count}")
    
    if st.button("Generate Summaries"):
        with st.spinner("Generating summaries..."):
            # Calculate dynamic max length based on compression rate
            max_length = max(50, int(word_count * compression_rate))
            
            # Generate summaries 
            bart_summary = bart_model(
                text_input, max_length=max_length, min_length=30, num_beams=4, early_stopping=True
            )[0]['summary_text']
            
            t5_summary = t5_model(
                text_input, max_length=max_length, min_length=30, num_beams=4, early_stopping=True
            )[0]['summary_text']
            
            pegasus_summary = pegasus_model(
                text_input, max_length=max_length, min_length=30, num_beams=4, early_stopping=True
            )[0]['summary_text']
            
            # Display summaries
            st.subheader("BART Summary")
            st.write(bart_summary)
            st.write(f"**Word Count:** {len(bart_summary.split())}")
            
            st.subheader("T5 Summary")
            st.write(t5_summary)
            st.write(f"**Word Count:** {len(t5_summary.split())}")
            
            st.subheader("Pegasus Summary")
            st.write(pegasus_summary)
            st.write(f"**Word Count:** {len(pegasus_summary.split())}")
else:
    st.warning("Please enter text to summarize.")