Spaces:

DhanushSGowda
/

Text_summarization_using_llm

Sleeping

App Files Files Community

Dhanush S Gowda commited on Dec 17, 2024

Commit

d769310

verified ·

1 Parent(s): 756850c

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -206

app.py CHANGED Viewed

@@ -1,216 +1,70 @@
 import streamlit as st
-import torch
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-import concurrent.futures
-import numpy as np
-import psutil
 import os
-class UltraOptimizedSummarizer:
-    def __init__(self):
-        # Advanced caching and memory management
-        self.models = {}
-        self.tokenizers = {}
-        self.device = self._get_optimal_device()
-    def _get_optimal_device(self):
-        """Intelligently select the best computational device."""
-        if torch.cuda.is_available():
-            # Find the GPU with most free memory
-            gpu_memory = [torch.cuda.memory_allocated(i) for i in range(torch.cuda.device_count())]
-            best_gpu = np.argmin(gpu_memory)
-            return torch.device(f'cuda:{best_gpu}')
-        elif torch.backends.mps.is_available():
-            return torch.device('mps')
-        return torch.device('cpu')
-    def _load_model(self, model_name):
-        """
-        Optimized model loading with advanced memory management.
-        Uses half-precision (float16) for reduced memory footprint.
-        """
-        if model_name in self.models:
-            return self.models[model_name], self.tokenizers[model_name]
-        try:
-            # Load tokenizer
-            tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir='/tmp/huggingface_cache')
-            # Load model with optimization
-            model = AutoModelForSeq2SeqLM.from_pretrained(
-                model_name,
-                cache_dir='/tmp/huggingface_cache',
-                torch_dtype=torch.float16 if self.device.type == 'cuda' else torch.float32,
-                low_cpu_mem_usage=True
-            ).to(self.device)
-            # Optional: Model compilation for additional speed (PyTorch 2.0+)
-            if hasattr(torch, 'compile'):
-                model = torch.compile(model)
-            self.models[model_name] = model
-            self.tokenizers[model_name] = tokenizer
-            return model, tokenizer
-        except Exception as e:
-            st.error(f"Model loading error for {model_name}: {e}")
-            return None, None
-    def summarize(self, text, model_name, max_length=150, min_length=50):
-        """
-        Ultra-optimized summarization with intelligent truncation.
-        """
-        model, tokenizer = self._load_model(model_name)
-        if not model or not tokenizer:
-            return "Summarization failed."
-        try:
-            # Intelligent text truncation
-            inputs = tokenizer(
-                text,
-                max_length=1024,  # Prevent OOM errors
-                truncation=True,
-                return_tensors='pt'
-            ).to(self.device)
-            # Generate summary with optimized parameters
-            summary_ids = model.generate(
-                inputs['input_ids'],
-                num_beams=4,
-                max_length=max_length,
-                min_length=min_length,
-                early_stopping=True,
-                no_repeat_ngram_size=2,
-                do_sample=False
-            )
-            # Decode summary
-            summary = tokenizer.decode(
-                summary_ids[0],
-                skip_special_tokens=True
-            )
-            return summary
-        except Exception as e:
-            st.error(f"Summarization error for {model_name}: {e}")
-            return "Could not generate summary."
-    def parallel_summarize(self, text, max_length=150, min_length=50):
-        """
-        Concurrent summarization with advanced thread pooling.
-        """
-        model_configs = [
-            "facebook/bart-large-cnn",
-            "t5-large",
-            "google/pegasus-cnn_dailymail"
-        ]
-        # Dynamic thread count based on system resources
-        max_workers = min(
-            len(model_configs),
-            psutil.cpu_count(logical=False),  # Physical cores
-            4  # Cap at 4 to prevent resource exhaustion
-        )
-        # Use concurrent futures for true parallel processing
-        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-            # Submit summarization tasks
-            future_to_model = {
-                executor.submit(
-                    self.summarize,
-                    text,
-                    model,
-                    max_length,
-                    min_length
-                ): model for model in model_configs
-            }
-            # Collect results as they complete
-            summaries = {}
-            for future in concurrent.futures.as_completed(future_to_model):
-                model = future_to_model[future]
-                try:
-                    summaries[model] = future.result()
-                except Exception as e:
-                    summaries[model] = f"Error: {e}"
-        return summaries
-def main():
-    st.set_page_config(
-        page_title="Ultra-Optimized Summarization",
-        page_icon="🚀",
-        layout="wide"
-    )
-    st.title("🔬 Hyper-Optimized Text Summarization")
-    # Initialize optimized summarizer
-    summarizer = UltraOptimizedSummarizer()
-    # Input and processing
-    text_input = st.text_area(
-        "Enter text for advanced summarization:",
-        height=300
-    )
-    # Advanced compression control
-    col1, col2 = st.columns(2)
-    with col1:
-        max_length = st.slider(
-            "Max Summary Length",
-            min_value=50,
-            max_value=300,
-            value=150
-        )
-    with col2:
-        compression_rate = st.slider(
-            "Compression Aggressiveness",
-            min_value=0.1,
-            max_value=0.5,
-            value=0.3,
-            step=0.05
-        )
-    if st.button("Generate Hyper-Optimized Summaries"):
-        if not text_input:
-            st.warning("Please provide text to summarize.")
-            return
-        # Progress tracking
-        progress_bar = st.progress(0)
-        status_text = st.empty()
-        try:
-            # Perform parallel summarization
-            status_text.info("Initializing ultra-optimized summarization...")
-            progress_bar.progress(20)
-            summaries = summarizer.parallel_summarize(
-                text_input,
-                max_length=max_length,
-                min_length=int(max_length * 0.5)
-            )
-            progress_bar.progress(100)
-            status_text.success("Summarization Complete!")
-            # Display results
-            cols = st.columns(3)
-            for (col, (model, summary)) in zip(cols, summaries.items()):
-                with col:
-                    st.subheader(model.split('/')[-1].upper())
-                    st.write(summary)
-        except Exception as e:
-            st.error(f"Optimization failed: {e}")
-        finally:
-            progress_bar.empty()
-            status_text.empty()
-if __name__ == "__main__":
-    main()

 import streamlit as st
+from transformers import pipeline
 import os
+# Set Hugging Face cache directory
+os.environ['TRANSFORMERS_CACHE'] = os.getenv('HF_HOME', os.path.expanduser('~/.cache/huggingface/hub'))
+# Function to load all three models
+@st.cache_resource
+def load_models():
+    bart_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    t5_summarizer = pipeline("summarization", model="t5-large")
+    pegasus_summarizer = pipeline("summarization", model="google/pegasus-cnn_dailymail")
+    return bart_summarizer, t5_summarizer, pegasus_summarizer
+# Streamlit app layout
+st.title("Text Summarization with Pre-trained Models: BART, T5, Pegasus")
+# Load models
+with st.spinner("Loading models..."):
+    bart_model, t5_model, pegasus_model = load_models()
+# Input text
+text_input = st.text_area("Enter text to summarize:")
+# Compression rate slider
+compression_rate = st.slider(
+    "Summary Compression Rate",
+    min_value=0.1,
+    max_value=0.5,
+    value=0.3,
+    step=0.05,
+    help="Adjust how much of the original text to keep in the summary"
+)
+if text_input:
+    word_count = len(text_input.split())
+    st.write(f"**Input Word Count:** {word_count}")
+    if st.button("Generate Summaries"):
+        with st.spinner("Generating summaries..."):
+            # Calculate dynamic max length based on compression rate
+            max_length = max(50, int(word_count * compression_rate))
+            # Generate summaries
+            bart_summary = bart_model(
+                text_input, max_length=max_length, min_length=30, num_beams=4, early_stopping=True
+            )[0]['summary_text']
+            t5_summary = t5_model(
+                text_input, max_length=max_length, min_length=30, num_beams=4, early_stopping=True
+            )[0]['summary_text']
+            pegasus_summary = pegasus_model(
+                text_input, max_length=max_length, min_length=30, num_beams=4, early_stopping=True
+            )[0]['summary_text']
+            # Display summaries
+            st.subheader("BART Summary")
+            st.write(bart_summary)
+            st.write(f"**Word Count:** {len(bart_summary.split())}")
+            st.subheader("T5 Summary")
+            st.write(t5_summary)
+            st.write(f"**Word Count:** {len(t5_summary.split())}")
+            st.subheader("Pegasus Summary")
+            st.write(pegasus_summary)
+            st.write(f"**Word Count:** {len(pegasus_summary.split())}")
+else:
+    st.warning("Please enter text to summarize.")