Spaces:
Running
Running
Commit
·
21e03a6
1
Parent(s):
1f07207
add more logs
Browse files
vms/ui/project/services/training.py
CHANGED
@@ -1495,10 +1495,16 @@ class TrainingService:
|
|
1495 |
# Check in lora_weights directory
|
1496 |
lora_weights_dir = self.app.output_path / "lora_weights"
|
1497 |
if lora_weights_dir.exists():
|
|
|
|
|
|
|
|
|
1498 |
lora_safetensors = lora_weights_dir / "pytorch_lora_weights.safetensors"
|
1499 |
if lora_safetensors.exists():
|
1500 |
logger.info(f"Found weights in lora_weights directory: {lora_safetensors}")
|
1501 |
return str(lora_safetensors)
|
|
|
|
|
1502 |
|
1503 |
# If not found in root or lora_weights, log the issue
|
1504 |
logger.warning(f"Model weights not found at expected location: {model_output_safetensors_path}")
|
@@ -1509,10 +1515,18 @@ class TrainingService:
|
|
1509 |
if checkpoints:
|
1510 |
logger.info(f"Found {len(checkpoints)} checkpoint directories, but main weights file is missing")
|
1511 |
latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split("_")[-1]))
|
|
|
|
|
|
|
|
|
|
|
|
|
1512 |
checkpoint_weights = latest_checkpoint / "pytorch_lora_weights.safetensors"
|
1513 |
if checkpoint_weights.exists():
|
1514 |
logger.info(f"Found weights in latest checkpoint: {checkpoint_weights}")
|
1515 |
return str(checkpoint_weights)
|
|
|
|
|
1516 |
|
1517 |
return None
|
1518 |
|
|
|
1495 |
# Check in lora_weights directory
|
1496 |
lora_weights_dir = self.app.output_path / "lora_weights"
|
1497 |
if lora_weights_dir.exists():
|
1498 |
+
logger.info(f"Found lora_weights directory: {lora_weights_dir}")
|
1499 |
+
lora_weights_contents = list(lora_weights_dir.glob("*"))
|
1500 |
+
logger.info(f"Contents of lora_weights directory: {lora_weights_contents}")
|
1501 |
+
|
1502 |
lora_safetensors = lora_weights_dir / "pytorch_lora_weights.safetensors"
|
1503 |
if lora_safetensors.exists():
|
1504 |
logger.info(f"Found weights in lora_weights directory: {lora_safetensors}")
|
1505 |
return str(lora_safetensors)
|
1506 |
+
else:
|
1507 |
+
logger.info(f"pytorch_lora_weights.safetensors not found in lora_weights directory")
|
1508 |
|
1509 |
# If not found in root or lora_weights, log the issue
|
1510 |
logger.warning(f"Model weights not found at expected location: {model_output_safetensors_path}")
|
|
|
1515 |
if checkpoints:
|
1516 |
logger.info(f"Found {len(checkpoints)} checkpoint directories, but main weights file is missing")
|
1517 |
latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split("_")[-1]))
|
1518 |
+
logger.info(f"Latest checkpoint directory: {latest_checkpoint}")
|
1519 |
+
|
1520 |
+
# Log contents of latest checkpoint
|
1521 |
+
checkpoint_contents = list(latest_checkpoint.glob("*"))
|
1522 |
+
logger.info(f"Contents of latest checkpoint {latest_checkpoint.name}: {checkpoint_contents}")
|
1523 |
+
|
1524 |
checkpoint_weights = latest_checkpoint / "pytorch_lora_weights.safetensors"
|
1525 |
if checkpoint_weights.exists():
|
1526 |
logger.info(f"Found weights in latest checkpoint: {checkpoint_weights}")
|
1527 |
return str(checkpoint_weights)
|
1528 |
+
else:
|
1529 |
+
logger.info(f"pytorch_lora_weights.safetensors not found in checkpoint directory")
|
1530 |
|
1531 |
return None
|
1532 |
|