import streamlit as st
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import concurrent.futures
import numpy as np
import psutil
import os

class UltraOptimizedSummarizer:
    def __init__(self):
        # Advanced caching and memory management
        self.models = {}
        self.tokenizers = {}
        self.device = self._get_optimal_device()
        
    def _get_optimal_device(self):
        """Intelligently select the best computational device."""
        if torch.cuda.is_available():
            # Find the GPU with most free memory
            gpu_memory = [torch.cuda.memory_allocated(i) for i in range(torch.cuda.device_count())]
            best_gpu = np.argmin(gpu_memory)
            return torch.device(f'cuda:{best_gpu}')
        elif torch.backends.mps.is_available():
            return torch.device('mps')
        return torch.device('cpu')
    
    def _load_model(self, model_name):
        """
        Optimized model loading with advanced memory management.
        Uses half-precision (float16) for reduced memory footprint.
        """
        if model_name in self.models:
            return self.models[model_name], self.tokenizers[model_name]
        
        try:
            # Load tokenizer
            tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir='/tmp/huggingface_cache')
            
            # Load model with optimization
            model = AutoModelForSeq2SeqLM.from_pretrained(
                model_name, 
                cache_dir='/tmp/huggingface_cache',
                torch_dtype=torch.float16 if self.device.type == 'cuda' else torch.float32,
                low_cpu_mem_usage=True
            ).to(self.device)
            
            # Optional: Model compilation for additional speed (PyTorch 2.0+)
            if hasattr(torch, 'compile'):
                model = torch.compile(model)
            
            self.models[model_name] = model
            self.tokenizers[model_name] = tokenizer
            
            return model, tokenizer
        
        except Exception as e:
            st.error(f"Model loading error for {model_name}: {e}")
            return None, None
    
    def summarize(self, text, model_name, max_length=150, min_length=50):
        """
        Ultra-optimized summarization with intelligent truncation.
        """
        model, tokenizer = self._load_model(model_name)
        if not model or not tokenizer:
            return "Summarization failed."
        
        try:
            # Intelligent text truncation
            inputs = tokenizer(
                text, 
                max_length=1024,  # Prevent OOM errors
                truncation=True, 
                return_tensors='pt'
            ).to(self.device)
            
            # Generate summary with optimized parameters
            summary_ids = model.generate(
                inputs['input_ids'],
                num_beams=4,
                max_length=max_length,
                min_length=min_length,
                early_stopping=True,
                no_repeat_ngram_size=2,
                do_sample=False
            )
            
            # Decode summary
            summary = tokenizer.decode(
                summary_ids[0], 
                skip_special_tokens=True
            )
            
            return summary
        
        except Exception as e:
            st.error(f"Summarization error for {model_name}: {e}")
            return "Could not generate summary."
    
    def parallel_summarize(self, text, max_length=150, min_length=50):
        """
        Concurrent summarization with advanced thread pooling.
        """
        model_configs = [
            "facebook/bart-large-cnn",
            "t5-large",
            "google/pegasus-cnn_dailymail"
        ]
        
        # Dynamic thread count based on system resources
        max_workers = min(
            len(model_configs), 
            psutil.cpu_count(logical=False),  # Physical cores
            4  # Cap at 4 to prevent resource exhaustion
        )
        
        # Use concurrent futures for true parallel processing
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit summarization tasks
            future_to_model = {
                executor.submit(
                    self.summarize, 
                    text, 
                    model, 
                    max_length, 
                    min_length
                ): model for model in model_configs
            }
            
            # Collect results as they complete
            summaries = {}
            for future in concurrent.futures.as_completed(future_to_model):
                model = future_to_model[future]
                try:
                    summaries[model] = future.result()
                except Exception as e:
                    summaries[model] = f"Error: {e}"
        
        return summaries

def main():
    st.set_page_config(
        page_title="Ultra-Optimized Summarization", 
        page_icon="🚀",
        layout="wide"
    )
    
    st.title("🔬 Hyper-Optimized Text Summarization")
    
    # Initialize optimized summarizer
    summarizer = UltraOptimizedSummarizer()
    
    # Input and processing
    text_input = st.text_area(
        "Enter text for advanced summarization:", 
        height=300
    )
    
    # Advanced compression control
    col1, col2 = st.columns(2)
    with col1:
        max_length = st.slider(
            "Max Summary Length", 
            min_value=50, 
            max_value=300, 
            value=150
        )
    
    with col2:
        compression_rate = st.slider(
            "Compression Aggressiveness", 
            min_value=0.1, 
            max_value=0.5, 
            value=0.3,
            step=0.05
        )
    
    if st.button("Generate Hyper-Optimized Summaries"):
        if not text_input:
            st.warning("Please provide text to summarize.")
            return
        
        # Progress tracking
        progress_bar = st.progress(0)
        status_text = st.empty()
        
        try:
            # Perform parallel summarization
            status_text.info("Initializing ultra-optimized summarization...")
            progress_bar.progress(20)
            
            summaries = summarizer.parallel_summarize(
                text_input, 
                max_length=max_length, 
                min_length=int(max_length * 0.5)
            )
            
            progress_bar.progress(100)
            status_text.success("Summarization Complete!")
            
            # Display results
            cols = st.columns(3)
            for (col, (model, summary)) in zip(cols, summaries.items()):
                with col:
                    st.subheader(model.split('/')[-1].upper())
                    st.write(summary)
        
        except Exception as e:
            st.error(f"Optimization failed: {e}")
        
        finally:
            progress_bar.empty()
            status_text.empty()

if __name__ == "__main__":
    main()