Spaces:

DhanushSGowda
/

Text_summarization_using_llm

Sleeping

App Files Files Community

Dhanush S Gowda commited on Dec 17, 2024

Commit

756850c

verified ·

1 Parent(s): 6895495

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -104

app.py CHANGED Viewed

@@ -1,143 +1,216 @@
 import streamlit as st
-import multiprocessing
-from transformers import pipeline
-import os
 import torch
-# Optimize model loading and caching
-@st.cache_resource
-def load_model(model_name):
-    """Efficiently load a summarization model."""
-    try:
-        # Use GPU if available
-        device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        return pipeline("summarization", model=model_name, device=device)
-    except Exception as e:
-        st.error(f"Error loading model {model_name}: {e}")
-        return None
-def generate_summary(model, text, length_percentage=0.3):
-    """
-    Generate summary with intelligent length control.
-    Args:
-        model: Hugging Face summarization pipeline
-        text: Input text to summarize
-        length_percentage: Percentage of original text to use for summary
-    Returns:
-        Generated summary
-    """
-    # Intelligent length calculation
-    word_count = len(text.split())
-    max_length = max(50, int(word_count * length_percentage))
-    min_length = max(30, int(word_count * 0.1))
-    try:
-        summary = model(
-            text,
-            max_length=max_length,
-            min_length=min_length,
-            num_beams=4,
-            early_stopping=True
-        )[0]['summary_text']
-        return summary
-    except Exception as e:
-        st.error(f"Summarization error: {e}")
-        return "Could not generate summary."
-def parallel_summarize(text, length_percentage=0.3):
-    """
-    Generate summaries in parallel using multiprocessing.
-    Args:
-        text: Input text to summarize
-        length_percentage: Percentage of original text to use for summary
-    Returns:
-        Dictionary of summaries from different models
-    """
-    model_configs = [
-        ("facebook/bart-large-cnn", "BART"),
-        ("t5-large", "T5"),
-        ("google/pegasus-cnn_dailymail", "Pegasus")
-    ]
-    with multiprocessing.Pool(processes=min(len(model_configs), os.cpu_count())) as pool:
-        args = [(load_model(model_name), text, length_percentage)
-                for model_name, _ in model_configs]
-        results = pool.starmap(generate_summary, args)
-    return {name: summary for (_, name), summary in zip(model_configs, results)}
 def main():
     st.set_page_config(
-        page_title="Multi-Model Text Summarization",
-        page_icon="📝",
         layout="wide"
     )
-    # Title and Description
-    st.title("🤖 Advanced Text Summarization")
-    st.markdown("""
-    Generate concise summaries using multiple state-of-the-art models.
-    Intelligently adapts summary length based on input text.
-    """)
-    # Text Input
     text_input = st.text_area(
-        "Paste your text here:",
-        height=250,
-        help="Enter the text you want to summarize"
     )
-    # Length Control
-    length_control = st.slider(
-        "Summary Compression Rate",
-        min_value=0.1,
-        max_value=0.5,
-        value=0.3,
-        step=0.05,
-        help="Adjust how much of the original text to keep in the summary"
-    )
-    if st.button("Generate Summaries", type="primary"):
         if not text_input:
-            st.warning("Please enter some text to summarize.")
             return
-        progress_text = st.empty()
         progress_bar = st.progress(0)
-        stages = ["Initializing Models", "Running BART", "Running T5", "Running Pegasus", "Completed"]
         try:
-            for i, stage in enumerate(stages[:-1], 1):
-                progress_text.info(stage)
-                progress_bar.progress(i * 20)
-                if i == 2:
-                    summaries = parallel_summarize(text_input, length_control)
-            progress_text.success("Summarization Complete!")
             progress_bar.progress(100)
-            st.subheader("📝 Generated Summaries")
             cols = st.columns(3)
             for (col, (model, summary)) in zip(cols, summaries.items()):
                 with col:
-                    st.markdown(f"### {model} Summary")
                     st.write(summary)
-                    st.caption(f"Word Count: {len(summary.split())}")
         except Exception as e:
-            st.error(f"An error occurred: {e}")
         finally:
-            progress_text.empty()
             progress_bar.empty()
 if __name__ == "__main__":
     main()

 import streamlit as st
 import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+import concurrent.futures
+import numpy as np
+import psutil
+import os
+class UltraOptimizedSummarizer:
+    def __init__(self):
+        # Advanced caching and memory management
+        self.models = {}
+        self.tokenizers = {}
+        self.device = self._get_optimal_device()
+    def _get_optimal_device(self):
+        """Intelligently select the best computational device."""
+        if torch.cuda.is_available():
+            # Find the GPU with most free memory
+            gpu_memory = [torch.cuda.memory_allocated(i) for i in range(torch.cuda.device_count())]
+            best_gpu = np.argmin(gpu_memory)
+            return torch.device(f'cuda:{best_gpu}')
+        elif torch.backends.mps.is_available():
+            return torch.device('mps')
+        return torch.device('cpu')
+    def _load_model(self, model_name):
+        """
+        Optimized model loading with advanced memory management.
+        Uses half-precision (float16) for reduced memory footprint.
+        """
+        if model_name in self.models:
+            return self.models[model_name], self.tokenizers[model_name]
+        try:
+            # Load tokenizer
+            tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir='/tmp/huggingface_cache')
+            # Load model with optimization
+            model = AutoModelForSeq2SeqLM.from_pretrained(
+                model_name,
+                cache_dir='/tmp/huggingface_cache',
+                torch_dtype=torch.float16 if self.device.type == 'cuda' else torch.float32,
+                low_cpu_mem_usage=True
+            ).to(self.device)
+            # Optional: Model compilation for additional speed (PyTorch 2.0+)
+            if hasattr(torch, 'compile'):
+                model = torch.compile(model)
+            self.models[model_name] = model
+            self.tokenizers[model_name] = tokenizer
+            return model, tokenizer
+        except Exception as e:
+            st.error(f"Model loading error for {model_name}: {e}")
+            return None, None
+    def summarize(self, text, model_name, max_length=150, min_length=50):
+        """
+        Ultra-optimized summarization with intelligent truncation.
+        """
+        model, tokenizer = self._load_model(model_name)
+        if not model or not tokenizer:
+            return "Summarization failed."
+        try:
+            # Intelligent text truncation
+            inputs = tokenizer(
+                text,
+                max_length=1024,  # Prevent OOM errors
+                truncation=True,
+                return_tensors='pt'
+            ).to(self.device)
+            # Generate summary with optimized parameters
+            summary_ids = model.generate(
+                inputs['input_ids'],
+                num_beams=4,
+                max_length=max_length,
+                min_length=min_length,
+                early_stopping=True,
+                no_repeat_ngram_size=2,
+                do_sample=False
+            )
+            # Decode summary
+            summary = tokenizer.decode(
+                summary_ids[0],
+                skip_special_tokens=True
+            )
+            return summary
+        except Exception as e:
+            st.error(f"Summarization error for {model_name}: {e}")
+            return "Could not generate summary."
+    def parallel_summarize(self, text, max_length=150, min_length=50):
+        """
+        Concurrent summarization with advanced thread pooling.
+        """
+        model_configs = [
+            "facebook/bart-large-cnn",
+            "t5-large",
+            "google/pegasus-cnn_dailymail"
+        ]
+        # Dynamic thread count based on system resources
+        max_workers = min(
+            len(model_configs),
+            psutil.cpu_count(logical=False),  # Physical cores
+            4  # Cap at 4 to prevent resource exhaustion
+        )
+        # Use concurrent futures for true parallel processing
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit summarization tasks
+            future_to_model = {
+                executor.submit(
+                    self.summarize,
+                    text,
+                    model,
+                    max_length,
+                    min_length
+                ): model for model in model_configs
+            }
+            # Collect results as they complete
+            summaries = {}
+            for future in concurrent.futures.as_completed(future_to_model):
+                model = future_to_model[future]
+                try:
+                    summaries[model] = future.result()
+                except Exception as e:
+                    summaries[model] = f"Error: {e}"
+        return summaries
 def main():
     st.set_page_config(
+        page_title="Ultra-Optimized Summarization",
+        page_icon="🚀",
         layout="wide"
     )
+    st.title("🔬 Hyper-Optimized Text Summarization")
+    # Initialize optimized summarizer
+    summarizer = UltraOptimizedSummarizer()
+    # Input and processing
     text_input = st.text_area(
+        "Enter text for advanced summarization:",
+        height=300
     )
+    # Advanced compression control
+    col1, col2 = st.columns(2)
+    with col1:
+        max_length = st.slider(
+            "Max Summary Length",
+            min_value=50,
+            max_value=300,
+            value=150
+        )
+    with col2:
+        compression_rate = st.slider(
+            "Compression Aggressiveness",
+            min_value=0.1,
+            max_value=0.5,
+            value=0.3,
+            step=0.05
+        )
+    if st.button("Generate Hyper-Optimized Summaries"):
         if not text_input:
+            st.warning("Please provide text to summarize.")
             return
+        # Progress tracking
         progress_bar = st.progress(0)
+        status_text = st.empty()
         try:
+            # Perform parallel summarization
+            status_text.info("Initializing ultra-optimized summarization...")
+            progress_bar.progress(20)
+            summaries = summarizer.parallel_summarize(
+                text_input,
+                max_length=max_length,
+                min_length=int(max_length * 0.5)
+            )
             progress_bar.progress(100)
+            status_text.success("Summarization Complete!")
+            # Display results
             cols = st.columns(3)
             for (col, (model, summary)) in zip(cols, summaries.items()):
                 with col:
+                    st.subheader(model.split('/')[-1].upper())
                     st.write(summary)
         except Exception as e:
+            st.error(f"Optimization failed: {e}")
         finally:
             progress_bar.empty()
+            status_text.empty()
 if __name__ == "__main__":
     main()