Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
aaa2fc9
1
Parent(s):
58454eb
refactor: remove FLASHINFER environment variable and update LLM initialization for batch processing
Browse files- generate_summaries_uv.py +20 -6
generate_summaries_uv.py
CHANGED
@@ -20,7 +20,6 @@ from typing import Optional
|
|
20 |
|
21 |
# Set environment variables to speed up model loading
|
22 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
23 |
-
# os.environ["VLLM_ATTENTION_BACKEND"] = "FLASHINFER"
|
24 |
|
25 |
import polars as pl
|
26 |
from datasets import Dataset, load_dataset
|
@@ -112,7 +111,10 @@ def generate_summaries(
|
|
112 |
|
113 |
# Initialize model and tokenizer from local path
|
114 |
logger.info(f"Initializing vLLM model from local path: {local_model_path}")
|
115 |
-
llm = LLM(
|
|
|
|
|
|
|
116 |
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
117 |
sampling_params = SamplingParams(
|
118 |
temperature=temperature,
|
@@ -131,10 +133,22 @@ def generate_summaries(
|
|
131 |
logger.info(f"Generating summaries for {len(prompts)} items")
|
132 |
all_outputs = []
|
133 |
|
134 |
-
for i in tqdm(range(0, len(prompts), batch_size), desc="Generating summaries"):
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
# Extract clean results
|
140 |
clean_results = [output.outputs[0].text.strip() for output in all_outputs]
|
|
|
20 |
|
21 |
# Set environment variables to speed up model loading
|
22 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
|
23 |
|
24 |
import polars as pl
|
25 |
from datasets import Dataset, load_dataset
|
|
|
111 |
|
112 |
# Initialize model and tokenizer from local path
|
113 |
logger.info(f"Initializing vLLM model from local path: {local_model_path}")
|
114 |
+
llm = LLM(
|
115 |
+
model=local_model_path,
|
116 |
+
max_model_len=4096, # Adjust based on model capabilities
|
117 |
+
)
|
118 |
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
119 |
sampling_params = SamplingParams(
|
120 |
temperature=temperature,
|
|
|
133 |
logger.info(f"Generating summaries for {len(prompts)} items")
|
134 |
all_outputs = []
|
135 |
|
136 |
+
# for i in tqdm(range(0, len(prompts), batch_size), desc="Generating summaries"):
|
137 |
+
# batch_prompts = prompts[i : i + batch_size]
|
138 |
+
# outputs = llm.generate(batch_prompts, sampling_params)
|
139 |
+
# all_outputs.extend(outputs)
|
140 |
+
# try directly doing whole dataset
|
141 |
+
all_outputs = llm.generate(
|
142 |
+
prompts,
|
143 |
+
sampling_params,
|
144 |
+
batch_size=batch_size,
|
145 |
+
max_batch_size=batch_size,
|
146 |
+
)
|
147 |
+
logger.info(f"Generated {len(all_outputs)} summaries")
|
148 |
+
if len(all_outputs) != len(prompts):
|
149 |
+
logger.warning(
|
150 |
+
f"Generated {len(all_outputs)} summaries, but expected {len(prompts)}. Some prompts may have failed."
|
151 |
+
)
|
152 |
|
153 |
# Extract clean results
|
154 |
clean_results = [output.outputs[0].text.strip() for output in all_outputs]
|