Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -40,9 +40,9 @@ class ModelManager:
|
|
40 |
self.whisper_model = None
|
41 |
self._initialized = True
|
42 |
|
43 |
-
@spaces.GPU(duration=
|
44 |
def initialize_models(self):
|
45 |
-
"""Initialize models with
|
46 |
try:
|
47 |
# Get HuggingFace token
|
48 |
HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN')
|
@@ -52,18 +52,19 @@ class ModelManager:
|
|
52 |
logger.info("Starting model initialization...")
|
53 |
model_name = "meta-llama/Llama-2-7b-chat-hf"
|
54 |
|
55 |
-
# Load tokenizer
|
56 |
logger.info("Loading tokenizer...")
|
57 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
58 |
model_name,
|
59 |
token=HUGGINGFACE_TOKEN,
|
60 |
-
use_fast=
|
|
|
61 |
)
|
62 |
if self.tokenizer is None:
|
63 |
raise RuntimeError("Failed to initialize tokenizer")
|
64 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
65 |
|
66 |
-
# Load model with
|
67 |
logger.info("Loading model...")
|
68 |
self.model = AutoModelForCausalLM.from_pretrained(
|
69 |
model_name,
|
@@ -71,12 +72,13 @@ class ModelManager:
|
|
71 |
torch_dtype=torch.float16,
|
72 |
device_map="auto",
|
73 |
low_cpu_mem_usage=True,
|
74 |
-
max_memory={0: "
|
|
|
75 |
)
|
76 |
if self.model is None:
|
77 |
raise RuntimeError("Failed to initialize model")
|
78 |
|
79 |
-
# Create pipeline
|
80 |
logger.info("Creating pipeline...")
|
81 |
self.news_generator = pipeline(
|
82 |
"text-generation",
|
@@ -84,18 +86,24 @@ class ModelManager:
|
|
84 |
tokenizer=self.tokenizer,
|
85 |
device_map="auto",
|
86 |
torch_dtype=torch.float16,
|
87 |
-
|
88 |
do_sample=True,
|
89 |
temperature=0.7,
|
90 |
top_p=0.95,
|
91 |
-
repetition_penalty=1.2
|
|
|
|
|
92 |
)
|
93 |
if self.news_generator is None:
|
94 |
raise RuntimeError("Failed to initialize news generator pipeline")
|
95 |
|
96 |
-
# Load Whisper model
|
97 |
logger.info("Loading Whisper model...")
|
98 |
-
self.whisper_model = whisper.load_model(
|
|
|
|
|
|
|
|
|
99 |
if self.whisper_model is None:
|
100 |
raise RuntimeError("Failed to initialize Whisper model")
|
101 |
|
@@ -108,15 +116,25 @@ class ModelManager:
|
|
108 |
raise
|
109 |
|
110 |
def reset_models(self):
|
111 |
-
"""Reset all models
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
def check_models_initialized(self):
|
122 |
"""Check if all models are properly initialized"""
|
@@ -184,7 +202,7 @@ def preprocess_audio(audio_file):
|
|
184 |
logger.error(f"Error preprocessing audio: {str(e)}")
|
185 |
raise
|
186 |
|
187 |
-
@spaces.GPU(duration=
|
188 |
def transcribe_audio(file):
|
189 |
"""Transcribe an audio or video file."""
|
190 |
try:
|
@@ -262,7 +280,7 @@ def process_social_content(url):
|
|
262 |
logger.error(f"Error processing social content: {str(e)}")
|
263 |
return None
|
264 |
|
265 |
-
@spaces.GPU(duration=
|
266 |
def generate_news(instructions, facts, size, tone, *args):
|
267 |
try:
|
268 |
# Get initialized models
|
@@ -371,18 +389,21 @@ Follow these requirements:
|
|
371 |
- Do not invent information
|
372 |
- Be rigorous with the provided facts [/INST]"""
|
373 |
|
374 |
-
#
|
|
|
|
|
|
|
375 |
with torch.inference_mode():
|
376 |
outputs = news_generator(
|
377 |
prompt,
|
378 |
-
max_new_tokens=
|
379 |
-
return_full_text=False,
|
380 |
-
pad_token_id=tokenizer.eos_token_id,
|
381 |
num_return_sequences=1,
|
382 |
do_sample=True,
|
383 |
temperature=0.7,
|
384 |
top_p=0.95,
|
385 |
-
repetition_penalty=1.2
|
|
|
|
|
386 |
)
|
387 |
|
388 |
news_article = outputs[0]['generated_text']
|
|
|
40 |
self.whisper_model = None
|
41 |
self._initialized = True
|
42 |
|
43 |
+
@spaces.GPU(duration=120)
|
44 |
def initialize_models(self):
|
45 |
+
"""Initialize models with optimized settings"""
|
46 |
try:
|
47 |
# Get HuggingFace token
|
48 |
HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN')
|
|
|
52 |
logger.info("Starting model initialization...")
|
53 |
model_name = "meta-llama/Llama-2-7b-chat-hf"
|
54 |
|
55 |
+
# Load tokenizer with optimized settings
|
56 |
logger.info("Loading tokenizer...")
|
57 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
58 |
model_name,
|
59 |
token=HUGGINGFACE_TOKEN,
|
60 |
+
use_fast=True,
|
61 |
+
model_max_length=512
|
62 |
)
|
63 |
if self.tokenizer is None:
|
64 |
raise RuntimeError("Failed to initialize tokenizer")
|
65 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
66 |
|
67 |
+
# Load model with optimized memory settings
|
68 |
logger.info("Loading model...")
|
69 |
self.model = AutoModelForCausalLM.from_pretrained(
|
70 |
model_name,
|
|
|
72 |
torch_dtype=torch.float16,
|
73 |
device_map="auto",
|
74 |
low_cpu_mem_usage=True,
|
75 |
+
max_memory={0: "6GiB"},
|
76 |
+
load_in_8bit=True
|
77 |
)
|
78 |
if self.model is None:
|
79 |
raise RuntimeError("Failed to initialize model")
|
80 |
|
81 |
+
# Create pipeline with optimized settings
|
82 |
logger.info("Creating pipeline...")
|
83 |
self.news_generator = pipeline(
|
84 |
"text-generation",
|
|
|
86 |
tokenizer=self.tokenizer,
|
87 |
device_map="auto",
|
88 |
torch_dtype=torch.float16,
|
89 |
+
max_new_tokens=512,
|
90 |
do_sample=True,
|
91 |
temperature=0.7,
|
92 |
top_p=0.95,
|
93 |
+
repetition_penalty=1.2,
|
94 |
+
num_return_sequences=1,
|
95 |
+
early_stopping=True
|
96 |
)
|
97 |
if self.news_generator is None:
|
98 |
raise RuntimeError("Failed to initialize news generator pipeline")
|
99 |
|
100 |
+
# Load Whisper model with optimized settings
|
101 |
logger.info("Loading Whisper model...")
|
102 |
+
self.whisper_model = whisper.load_model(
|
103 |
+
"tiny",
|
104 |
+
device="cuda",
|
105 |
+
download_root="/tmp/whisper"
|
106 |
+
)
|
107 |
if self.whisper_model is None:
|
108 |
raise RuntimeError("Failed to initialize Whisper model")
|
109 |
|
|
|
116 |
raise
|
117 |
|
118 |
def reset_models(self):
|
119 |
+
"""Reset all models and clear GPU memory"""
|
120 |
+
try:
|
121 |
+
del self.tokenizer
|
122 |
+
del self.model
|
123 |
+
del self.news_generator
|
124 |
+
del self.whisper_model
|
125 |
+
|
126 |
+
self.tokenizer = None
|
127 |
+
self.model = None
|
128 |
+
self.news_generator = None
|
129 |
+
self.whisper_model = None
|
130 |
+
|
131 |
+
# Clear CUDA cache
|
132 |
+
if torch.cuda.is_available():
|
133 |
+
torch.cuda.empty_cache()
|
134 |
+
torch.cuda.synchronize()
|
135 |
+
|
136 |
+
except Exception as e:
|
137 |
+
logger.error(f"Error during model reset: {str(e)}")
|
138 |
|
139 |
def check_models_initialized(self):
|
140 |
"""Check if all models are properly initialized"""
|
|
|
202 |
logger.error(f"Error preprocessing audio: {str(e)}")
|
203 |
raise
|
204 |
|
205 |
+
@spaces.GPU(duration=120)
|
206 |
def transcribe_audio(file):
|
207 |
"""Transcribe an audio or video file."""
|
208 |
try:
|
|
|
280 |
logger.error(f"Error processing social content: {str(e)}")
|
281 |
return None
|
282 |
|
283 |
+
@spaces.GPU(duration=120)
|
284 |
def generate_news(instructions, facts, size, tone, *args):
|
285 |
try:
|
286 |
# Get initialized models
|
|
|
389 |
- Do not invent information
|
390 |
- Be rigorous with the provided facts [/INST]"""
|
391 |
|
392 |
+
# Optimize size and max tokens
|
393 |
+
max_tokens = min(int(size * 1.5), 512)
|
394 |
+
|
395 |
+
# Generate article with optimized settings
|
396 |
with torch.inference_mode():
|
397 |
outputs = news_generator(
|
398 |
prompt,
|
399 |
+
max_new_tokens=max_tokens,
|
|
|
|
|
400 |
num_return_sequences=1,
|
401 |
do_sample=True,
|
402 |
temperature=0.7,
|
403 |
top_p=0.95,
|
404 |
+
repetition_penalty=1.2,
|
405 |
+
early_stopping=True,
|
406 |
+
pad_token_id=tokenizer.eos_token_id
|
407 |
)
|
408 |
|
409 |
news_article = outputs[0]['generated_text']
|