Spaces:
Paused
Paused
Commit
·
1cf1484
1
Parent(s):
6485751
Fixing LLM init v7
Browse files- main/routes.py +4 -4
main/routes.py
CHANGED
@@ -263,15 +263,15 @@ async def initialize_model(request: InitializeRequest):
|
|
263 |
logger.info(f"Using model path: {model_path}")
|
264 |
|
265 |
# Load the model
|
266 |
-
|
267 |
llm_instance = LLM.load(
|
268 |
model=model_path,
|
269 |
distribute=None if request.precision or request.quantize else "auto"
|
270 |
)
|
271 |
-
|
272 |
|
273 |
# If manual distribution is needed
|
274 |
-
|
275 |
if request.precision or request.quantize:
|
276 |
llm_instance.distribute(
|
277 |
accelerator="cuda" if request.mode == "gpu" else "cpu",
|
@@ -279,7 +279,7 @@ async def initialize_model(request: InitializeRequest):
|
|
279 |
precision=request.precision,
|
280 |
quantize=request.quantize
|
281 |
)
|
282 |
-
|
283 |
|
284 |
logger.info(
|
285 |
f"Model initialized successfully with config:\n"
|
|
|
263 |
logger.info(f"Using model path: {model_path}")
|
264 |
|
265 |
# Load the model
|
266 |
+
logger.info("Loading model")
|
267 |
llm_instance = LLM.load(
|
268 |
model=model_path,
|
269 |
distribute=None if request.precision or request.quantize else "auto"
|
270 |
)
|
271 |
+
logger.info("Done loading model")
|
272 |
|
273 |
# If manual distribution is needed
|
274 |
+
logger.info("Distributing model")
|
275 |
if request.precision or request.quantize:
|
276 |
llm_instance.distribute(
|
277 |
accelerator="cuda" if request.mode == "gpu" else "cpu",
|
|
|
279 |
precision=request.precision,
|
280 |
quantize=request.quantize
|
281 |
)
|
282 |
+
logger.info("Done distributing model")
|
283 |
|
284 |
logger.info(
|
285 |
f"Model initialized successfully with config:\n"
|