File size: 7,122 Bytes
caf0283 6895495 756850c 53545b3 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 401ffed 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 6895495 756850c 401ffed 6895495 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
import streamlit as st
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import concurrent.futures
import numpy as np
import psutil
import os
class UltraOptimizedSummarizer:
def __init__(self):
# Advanced caching and memory management
self.models = {}
self.tokenizers = {}
self.device = self._get_optimal_device()
def _get_optimal_device(self):
"""Intelligently select the best computational device."""
if torch.cuda.is_available():
# Find the GPU with most free memory
gpu_memory = [torch.cuda.memory_allocated(i) for i in range(torch.cuda.device_count())]
best_gpu = np.argmin(gpu_memory)
return torch.device(f'cuda:{best_gpu}')
elif torch.backends.mps.is_available():
return torch.device('mps')
return torch.device('cpu')
def _load_model(self, model_name):
"""
Optimized model loading with advanced memory management.
Uses half-precision (float16) for reduced memory footprint.
"""
if model_name in self.models:
return self.models[model_name], self.tokenizers[model_name]
try:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir='/tmp/huggingface_cache')
# Load model with optimization
model = AutoModelForSeq2SeqLM.from_pretrained(
model_name,
cache_dir='/tmp/huggingface_cache',
torch_dtype=torch.float16 if self.device.type == 'cuda' else torch.float32,
low_cpu_mem_usage=True
).to(self.device)
# Optional: Model compilation for additional speed (PyTorch 2.0+)
if hasattr(torch, 'compile'):
model = torch.compile(model)
self.models[model_name] = model
self.tokenizers[model_name] = tokenizer
return model, tokenizer
except Exception as e:
st.error(f"Model loading error for {model_name}: {e}")
return None, None
def summarize(self, text, model_name, max_length=150, min_length=50):
"""
Ultra-optimized summarization with intelligent truncation.
"""
model, tokenizer = self._load_model(model_name)
if not model or not tokenizer:
return "Summarization failed."
try:
# Intelligent text truncation
inputs = tokenizer(
text,
max_length=1024, # Prevent OOM errors
truncation=True,
return_tensors='pt'
).to(self.device)
# Generate summary with optimized parameters
summary_ids = model.generate(
inputs['input_ids'],
num_beams=4,
max_length=max_length,
min_length=min_length,
early_stopping=True,
no_repeat_ngram_size=2,
do_sample=False
)
# Decode summary
summary = tokenizer.decode(
summary_ids[0],
skip_special_tokens=True
)
return summary
except Exception as e:
st.error(f"Summarization error for {model_name}: {e}")
return "Could not generate summary."
def parallel_summarize(self, text, max_length=150, min_length=50):
"""
Concurrent summarization with advanced thread pooling.
"""
model_configs = [
"facebook/bart-large-cnn",
"t5-large",
"google/pegasus-cnn_dailymail"
]
# Dynamic thread count based on system resources
max_workers = min(
len(model_configs),
psutil.cpu_count(logical=False), # Physical cores
4 # Cap at 4 to prevent resource exhaustion
)
# Use concurrent futures for true parallel processing
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit summarization tasks
future_to_model = {
executor.submit(
self.summarize,
text,
model,
max_length,
min_length
): model for model in model_configs
}
# Collect results as they complete
summaries = {}
for future in concurrent.futures.as_completed(future_to_model):
model = future_to_model[future]
try:
summaries[model] = future.result()
except Exception as e:
summaries[model] = f"Error: {e}"
return summaries
def main():
st.set_page_config(
page_title="Ultra-Optimized Summarization",
page_icon="🚀",
layout="wide"
)
st.title("🔬 Hyper-Optimized Text Summarization")
# Initialize optimized summarizer
summarizer = UltraOptimizedSummarizer()
# Input and processing
text_input = st.text_area(
"Enter text for advanced summarization:",
height=300
)
# Advanced compression control
col1, col2 = st.columns(2)
with col1:
max_length = st.slider(
"Max Summary Length",
min_value=50,
max_value=300,
value=150
)
with col2:
compression_rate = st.slider(
"Compression Aggressiveness",
min_value=0.1,
max_value=0.5,
value=0.3,
step=0.05
)
if st.button("Generate Hyper-Optimized Summaries"):
if not text_input:
st.warning("Please provide text to summarize.")
return
# Progress tracking
progress_bar = st.progress(0)
status_text = st.empty()
try:
# Perform parallel summarization
status_text.info("Initializing ultra-optimized summarization...")
progress_bar.progress(20)
summaries = summarizer.parallel_summarize(
text_input,
max_length=max_length,
min_length=int(max_length * 0.5)
)
progress_bar.progress(100)
status_text.success("Summarization Complete!")
# Display results
cols = st.columns(3)
for (col, (model, summary)) in zip(cols, summaries.items()):
with col:
st.subheader(model.split('/')[-1].upper())
st.write(summary)
except Exception as e:
st.error(f"Optimization failed: {e}")
finally:
progress_bar.empty()
status_text.empty()
if __name__ == "__main__":
main() |