Spaces:

DhanushSGowda
/

Text_summarization_using_llm

Sleeping

App Files Files Community

Dhanush S Gowda commited on Dec 17, 2024

Commit

6895495

verified ·

1 Parent(s): 401ffed

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -66

app.py CHANGED Viewed

@@ -1,75 +1,143 @@
 import streamlit as st
 from transformers import pipeline
 import os
-# Set Hugging Face cache directory
-os.environ['TRANSFORMERS_CACHE'] = os.getenv('HF_HOME', os.path.expanduser('~/.cache/huggingface/hub'))
-# Function to load all three models
 @st.cache_resource
-def load_models():
-    bart_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    t5_summarizer = pipeline("summarization", model="t5-large")
-    pegasus_summarizer = pipeline("summarization", model="google/pegasus-cnn_dailymail")
-    return bart_summarizer, t5_summarizer, pegasus_summarizer
-# Streamlit app layout
-st.title("Text Summarization with Pre-trained Models: BART, T5, Pegasus")
-# Load models
-with st.spinner("Loading models..."):
-    bart_model, t5_model, pegasus_model = load_models()
-# Input text
-text_input = st.text_area("Enter text to summarize:")
-# User input for min and max words
-st.sidebar.header("Summary Length Settings")
-min_words = st.sidebar.slider("Minimum words in summary:", 10, 100, 50, step=5)
-max_words = st.sidebar.slider("Maximum words in summary:", min_words + 10, 300, 150, step=10)
-if text_input:
-    word_count = len(text_input.split())
-    st.write(f"**Input Word Count:** {word_count}")
-    if st.button("Generate Summaries"):
-        with st.spinner("Generating summaries..."):
-            # Generate summaries with dynamic length constraints
-            bart_summary = bart_model(
-                text_input,
-                max_length=max_words,
-                min_length=min_words,
-                num_beams=4,
-                early_stopping=True
-            )[0]['summary_text']
-            t5_summary = t5_model(
-                text_input,
-                max_length=max_words,
-                min_length=min_words,
-                num_beams=4,
-                early_stopping=True
-            )[0]['summary_text']
-            pegasus_summary = pegasus_model(
-                text_input,
-                max_length=max_words,
-                min_length=min_words,
-                num_beams=4,
-                early_stopping=True
-            )[0]['summary_text']
-        # Display summaries
-        st.subheader("BART Summary")
-        st.write(bart_summary)
-        st.write(f"**Word Count:** {len(bart_summary.split())}")
-        st.subheader("T5 Summary")
-        st.write(t5_summary)
-        st.write(f"**Word Count:** {len(t5_summary.split())}")
-        st.subheader("Pegasus Summary")
-        st.write(pegasus_summary)
-        st.write(f"**Word Count:** {len(pegasus_summary.split())}")
-else:
-    st.warning("Please enter text to summarize.")

 import streamlit as st
+import multiprocessing
 from transformers import pipeline
 import os
+import torch
+# Optimize model loading and caching
 @st.cache_resource
+def load_model(model_name):
+    """Efficiently load a summarization model."""
+    try:
+        # Use GPU if available
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        return pipeline("summarization", model=model_name, device=device)
+    except Exception as e:
+        st.error(f"Error loading model {model_name}: {e}")
+        return None
+def generate_summary(model, text, length_percentage=0.3):
+    """
+    Generate summary with intelligent length control.
+    Args:
+        model: Hugging Face summarization pipeline
+        text: Input text to summarize
+        length_percentage: Percentage of original text to use for summary
+    Returns:
+        Generated summary
+    """
+    # Intelligent length calculation
+    word_count = len(text.split())
+    max_length = max(50, int(word_count * length_percentage))
+    min_length = max(30, int(word_count * 0.1))
+    try:
+        summary = model(
+            text,
+            max_length=max_length,
+            min_length=min_length,
+            num_beams=4,
+            early_stopping=True
+        )[0]['summary_text']
+        return summary
+    except Exception as e:
+        st.error(f"Summarization error: {e}")
+        return "Could not generate summary."
+def parallel_summarize(text, length_percentage=0.3):
+    """
+    Generate summaries in parallel using multiprocessing.
+    Args:
+        text: Input text to summarize
+        length_percentage: Percentage of original text to use for summary
+    Returns:
+        Dictionary of summaries from different models
+    """
+    model_configs = [
+        ("facebook/bart-large-cnn", "BART"),
+        ("t5-large", "T5"),
+        ("google/pegasus-cnn_dailymail", "Pegasus")
+    ]
+    with multiprocessing.Pool(processes=min(len(model_configs), os.cpu_count())) as pool:
+        args = [(load_model(model_name), text, length_percentage)
+                for model_name, _ in model_configs]
+        results = pool.starmap(generate_summary, args)
+    return {name: summary for (_, name), summary in zip(model_configs, results)}
+def main():
+    st.set_page_config(
+        page_title="Multi-Model Text Summarization",
+        page_icon="📝",
+        layout="wide"
+    )
+    # Title and Description
+    st.title("🤖 Advanced Text Summarization")
+    st.markdown("""
+    Generate concise summaries using multiple state-of-the-art models.
+    Intelligently adapts summary length based on input text.
+    """)
+    # Text Input
+    text_input = st.text_area(
+        "Paste your text here:",
+        height=250,
+        help="Enter the text you want to summarize"
+    )
+    # Length Control
+    length_control = st.slider(
+        "Summary Compression Rate",
+        min_value=0.1,
+        max_value=0.5,
+        value=0.3,
+        step=0.05,
+        help="Adjust how much of the original text to keep in the summary"
+    )
+    if st.button("Generate Summaries", type="primary"):
+        if not text_input:
+            st.warning("Please enter some text to summarize.")
+            return
+        progress_text = st.empty()
+        progress_bar = st.progress(0)
+        stages = ["Initializing Models", "Running BART", "Running T5", "Running Pegasus", "Completed"]
+        try:
+            for i, stage in enumerate(stages[:-1], 1):
+                progress_text.info(stage)
+                progress_bar.progress(i * 20)
+                if i == 2:
+                    summaries = parallel_summarize(text_input, length_control)
+            progress_text.success("Summarization Complete!")
+            progress_bar.progress(100)
+            st.subheader("📝 Generated Summaries")
+            cols = st.columns(3)
+            for (col, (model, summary)) in zip(cols, summaries.items()):
+                with col:
+                    st.markdown(f"### {model} Summary")
+                    st.write(summary)
+                    st.caption(f"Word Count: {len(summary.split())}")
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
+        finally:
+            progress_text.empty()
+            progress_bar.empty()
+if __name__ == "__main__":
+    main()