Spaces:
Running
Running
Commit
·
e8518d0
1
Parent(s):
9cdaa70
less logs
Browse files
vms/ui/project/services/training.py
CHANGED
@@ -1664,25 +1664,25 @@ class TrainingService:
|
|
1664 |
# Check in lora_weights directory
|
1665 |
lora_weights_dir = self.app.output_path / "lora_weights"
|
1666 |
if lora_weights_dir.exists():
|
1667 |
-
logger.info(f"Found lora_weights directory: {lora_weights_dir}")
|
1668 |
|
1669 |
# Look for the latest checkpoint directory in lora_weights
|
1670 |
lora_checkpoints = [d for d in lora_weights_dir.glob("*") if d.is_dir() and d.name.isdigit()]
|
1671 |
if lora_checkpoints:
|
1672 |
latest_lora_checkpoint = max(lora_checkpoints, key=lambda x: int(x.name))
|
1673 |
-
logger.info(f"Found latest LoRA checkpoint: {latest_lora_checkpoint}")
|
1674 |
|
1675 |
# Extract step count from directory name
|
1676 |
result["steps"] = int(latest_lora_checkpoint.name)
|
1677 |
|
1678 |
# List contents of the latest checkpoint directory
|
1679 |
checkpoint_contents = list(latest_lora_checkpoint.glob("*"))
|
1680 |
-
logger.info(f"Contents of LoRA checkpoint {latest_lora_checkpoint.name}: {checkpoint_contents}")
|
1681 |
|
1682 |
# Check for weights in the latest LoRA checkpoint
|
1683 |
lora_safetensors = latest_lora_checkpoint / "pytorch_lora_weights.safetensors"
|
1684 |
if lora_safetensors.exists():
|
1685 |
-
logger.info(f"Found weights in latest LoRA checkpoint: {lora_safetensors}")
|
1686 |
result["path"] = str(lora_safetensors)
|
1687 |
return result
|
1688 |
|
@@ -1697,14 +1697,14 @@ class TrainingService:
|
|
1697 |
for weight_file in possible_weight_files:
|
1698 |
weight_path = latest_lora_checkpoint / weight_file
|
1699 |
if weight_path.exists():
|
1700 |
-
logger.info(f"Found weights file {weight_file} in latest LoRA checkpoint: {weight_path}")
|
1701 |
result["path"] = str(weight_path)
|
1702 |
return result
|
1703 |
|
1704 |
# Check if any .safetensors files exist
|
1705 |
safetensors_files = list(latest_lora_checkpoint.glob("*.safetensors"))
|
1706 |
if safetensors_files:
|
1707 |
-
logger.info(f"Found .safetensors files in LoRA checkpoint: {safetensors_files}")
|
1708 |
# Return the first .safetensors file found
|
1709 |
result["path"] = str(safetensors_files[0])
|
1710 |
return result
|
@@ -1712,11 +1712,12 @@ class TrainingService:
|
|
1712 |
# Fallback: check for direct safetensors file in lora_weights root
|
1713 |
lora_safetensors = lora_weights_dir / "pytorch_lora_weights.safetensors"
|
1714 |
if lora_safetensors.exists():
|
1715 |
-
logger.info(f"Found weights in lora_weights directory: {lora_safetensors}")
|
1716 |
result["path"] = str(lora_safetensors)
|
1717 |
return result
|
1718 |
else:
|
1719 |
logger.info(f"pytorch_lora_weights.safetensors not found in lora_weights directory")
|
|
|
1720 |
|
1721 |
# If not found in root or lora_weights, log the issue and check fallback
|
1722 |
logger.warning(f"Model weights not found at expected location: {model_output_safetensors_path}")
|
|
|
1664 |
# Check in lora_weights directory
|
1665 |
lora_weights_dir = self.app.output_path / "lora_weights"
|
1666 |
if lora_weights_dir.exists():
|
1667 |
+
#logger.info(f"Found lora_weights directory: {lora_weights_dir}")
|
1668 |
|
1669 |
# Look for the latest checkpoint directory in lora_weights
|
1670 |
lora_checkpoints = [d for d in lora_weights_dir.glob("*") if d.is_dir() and d.name.isdigit()]
|
1671 |
if lora_checkpoints:
|
1672 |
latest_lora_checkpoint = max(lora_checkpoints, key=lambda x: int(x.name))
|
1673 |
+
#logger.info(f"Found latest LoRA checkpoint: {latest_lora_checkpoint}")
|
1674 |
|
1675 |
# Extract step count from directory name
|
1676 |
result["steps"] = int(latest_lora_checkpoint.name)
|
1677 |
|
1678 |
# List contents of the latest checkpoint directory
|
1679 |
checkpoint_contents = list(latest_lora_checkpoint.glob("*"))
|
1680 |
+
#logger.info(f"Contents of LoRA checkpoint {latest_lora_checkpoint.name}: {checkpoint_contents}")
|
1681 |
|
1682 |
# Check for weights in the latest LoRA checkpoint
|
1683 |
lora_safetensors = latest_lora_checkpoint / "pytorch_lora_weights.safetensors"
|
1684 |
if lora_safetensors.exists():
|
1685 |
+
#logger.info(f"Found weights in latest LoRA checkpoint: {lora_safetensors}")
|
1686 |
result["path"] = str(lora_safetensors)
|
1687 |
return result
|
1688 |
|
|
|
1697 |
for weight_file in possible_weight_files:
|
1698 |
weight_path = latest_lora_checkpoint / weight_file
|
1699 |
if weight_path.exists():
|
1700 |
+
#logger.info(f"Found weights file {weight_file} in latest LoRA checkpoint: {weight_path}")
|
1701 |
result["path"] = str(weight_path)
|
1702 |
return result
|
1703 |
|
1704 |
# Check if any .safetensors files exist
|
1705 |
safetensors_files = list(latest_lora_checkpoint.glob("*.safetensors"))
|
1706 |
if safetensors_files:
|
1707 |
+
#logger.info(f"Found .safetensors files in LoRA checkpoint: {safetensors_files}")
|
1708 |
# Return the first .safetensors file found
|
1709 |
result["path"] = str(safetensors_files[0])
|
1710 |
return result
|
|
|
1712 |
# Fallback: check for direct safetensors file in lora_weights root
|
1713 |
lora_safetensors = lora_weights_dir / "pytorch_lora_weights.safetensors"
|
1714 |
if lora_safetensors.exists():
|
1715 |
+
#logger.info(f"Found weights in lora_weights directory: {lora_safetensors}")
|
1716 |
result["path"] = str(lora_safetensors)
|
1717 |
return result
|
1718 |
else:
|
1719 |
logger.info(f"pytorch_lora_weights.safetensors not found in lora_weights directory")
|
1720 |
+
pass
|
1721 |
|
1722 |
# If not found in root or lora_weights, log the issue and check fallback
|
1723 |
logger.warning(f"Model weights not found at expected location: {model_output_safetensors_path}")
|