Spaces:
Running
Running
Commit
·
4905a7d
1
Parent(s):
a73397c
fix
Browse files- app.py +15 -5
- training_log_parser.py +1 -0
- training_service.py +7 -6
app.py
CHANGED
@@ -63,6 +63,8 @@ class VideoTrainerUI:
|
|
63 |
"""Update UI components based on training state"""
|
64 |
updates = {}
|
65 |
|
|
|
|
|
66 |
# Update status box with high-level information
|
67 |
status_text = []
|
68 |
if training_state["status"] != "idle":
|
@@ -258,10 +260,13 @@ class VideoTrainerUI:
|
|
258 |
|
259 |
def update_training_buttons(self, training_state: Dict[str, Any]) -> Dict:
|
260 |
"""Update training control buttons based on state"""
|
|
|
261 |
is_training = training_state["status"] in ["training", "initializing"]
|
|
|
|
|
262 |
is_paused = training_state["status"] == "paused"
|
263 |
is_completed = training_state["status"] in ["completed", "error", "stopped"]
|
264 |
-
|
265 |
return {
|
266 |
"start_btn": gr.Button(
|
267 |
interactive=not is_training and not is_paused,
|
@@ -289,8 +294,10 @@ class VideoTrainerUI:
|
|
289 |
})
|
290 |
|
291 |
def handle_pause_resume(self):
|
|
|
292 |
status = self.trainer.get_status()
|
293 |
-
|
|
|
294 |
result = self.trainer.resume_training()
|
295 |
new_state = {"status": "training"}
|
296 |
else:
|
@@ -623,6 +630,8 @@ class VideoTrainerUI:
|
|
623 |
|
624 |
status_update = status["message"]
|
625 |
|
|
|
|
|
626 |
# Parse new log lines
|
627 |
if logs:
|
628 |
last_state = None
|
@@ -630,6 +639,7 @@ class VideoTrainerUI:
|
|
630 |
state_update = self.log_parser.parse_line(line)
|
631 |
if state_update:
|
632 |
last_state = state_update
|
|
|
633 |
|
634 |
if last_state:
|
635 |
ui_updates = self.update_training_ui(last_state)
|
@@ -648,6 +658,8 @@ class VideoTrainerUI:
|
|
648 |
"message": status
|
649 |
}
|
650 |
|
|
|
|
|
651 |
if is_completed:
|
652 |
button_updates = self.handle_training_complete()
|
653 |
return (
|
@@ -1129,9 +1141,7 @@ class VideoTrainerUI:
|
|
1129 |
],
|
1130 |
outputs=[status_box, log_box]
|
1131 |
).success(
|
1132 |
-
fn=lambda: self.update_training_buttons(
|
1133 |
-
"status": "training"
|
1134 |
-
}),
|
1135 |
outputs=[start_btn, stop_btn, pause_resume_btn]
|
1136 |
)
|
1137 |
|
|
|
63 |
"""Update UI components based on training state"""
|
64 |
updates = {}
|
65 |
|
66 |
+
print("update_training_ui: training_state = ", training_state)
|
67 |
+
|
68 |
# Update status box with high-level information
|
69 |
status_text = []
|
70 |
if training_state["status"] != "idle":
|
|
|
260 |
|
261 |
def update_training_buttons(self, training_state: Dict[str, Any]) -> Dict:
|
262 |
"""Update training control buttons based on state"""
|
263 |
+
#print("update_training_buttons: training_state = ", training_state)
|
264 |
is_training = training_state["status"] in ["training", "initializing"]
|
265 |
+
if training_state["message"] == "No training in progress":
|
266 |
+
is_training = False
|
267 |
is_paused = training_state["status"] == "paused"
|
268 |
is_completed = training_state["status"] in ["completed", "error", "stopped"]
|
269 |
+
#print(f"update_training_buttons: is_training = {is_training}, is_paused = {is_paused}, is_completed = {is_completed}")
|
270 |
return {
|
271 |
"start_btn": gr.Button(
|
272 |
interactive=not is_training and not is_paused,
|
|
|
294 |
})
|
295 |
|
296 |
def handle_pause_resume(self):
|
297 |
+
|
298 |
status = self.trainer.get_status()
|
299 |
+
print("handle_pause_resume: status = ", status)
|
300 |
+
if status["status"] == "paused":
|
301 |
result = self.trainer.resume_training()
|
302 |
new_state = {"status": "training"}
|
303 |
else:
|
|
|
630 |
|
631 |
status_update = status["message"]
|
632 |
|
633 |
+
# print(f"refresh_training_status_and_logs: ", status)
|
634 |
+
|
635 |
# Parse new log lines
|
636 |
if logs:
|
637 |
last_state = None
|
|
|
639 |
state_update = self.log_parser.parse_line(line)
|
640 |
if state_update:
|
641 |
last_state = state_update
|
642 |
+
print("last_state = ", last_state)
|
643 |
|
644 |
if last_state:
|
645 |
ui_updates = self.update_training_ui(last_state)
|
|
|
658 |
"message": status
|
659 |
}
|
660 |
|
661 |
+
#print("refresh_training_status: current_state = ", current_state)
|
662 |
+
|
663 |
if is_completed:
|
664 |
button_updates = self.handle_training_complete()
|
665 |
return (
|
|
|
1141 |
],
|
1142 |
outputs=[status_box, log_box]
|
1143 |
).success(
|
1144 |
+
fn=lambda: self.update_training_buttons(),
|
|
|
|
|
1145 |
outputs=[start_btn, stop_btn, pause_resume_btn]
|
1146 |
)
|
1147 |
|
training_log_parser.py
CHANGED
@@ -73,6 +73,7 @@ class TrainingLogParser:
|
|
73 |
if "Training steps:" in line:
|
74 |
# Set status to training if we see this
|
75 |
self.state.status = "training"
|
|
|
76 |
if not self.state.start_time:
|
77 |
self.state.start_time = datetime.now()
|
78 |
|
|
|
73 |
if "Training steps:" in line:
|
74 |
# Set status to training if we see this
|
75 |
self.state.status = "training"
|
76 |
+
print("setting status to 'training'")
|
77 |
if not self.state.start_time:
|
78 |
self.state.start_time = datetime.now()
|
79 |
|
training_service.py
CHANGED
@@ -29,7 +29,7 @@ logging.basicConfig(
|
|
29 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
30 |
handlers=[
|
31 |
logging.StreamHandler(sys.stdout),
|
32 |
-
logging.FileHandler('training_service.log')
|
33 |
]
|
34 |
)
|
35 |
logger = logging.getLogger(__name__)
|
@@ -65,7 +65,7 @@ class TrainingService:
|
|
65 |
|
66 |
def get_status(self) -> Dict:
|
67 |
"""Get current training status"""
|
68 |
-
default_status = {'
|
69 |
|
70 |
if not self.status_file.exists():
|
71 |
return default_status
|
@@ -73,6 +73,7 @@ class TrainingService:
|
|
73 |
try:
|
74 |
with open(self.status_file, 'r') as f:
|
75 |
status = json.load(f)
|
|
|
76 |
|
77 |
# Check if process is actually running
|
78 |
if self.pid_file.exists():
|
@@ -80,12 +81,12 @@ class TrainingService:
|
|
80 |
pid = int(f.read().strip())
|
81 |
if not psutil.pid_exists(pid):
|
82 |
# Process died unexpectedly
|
83 |
-
if status['
|
84 |
-
status['
|
85 |
status['message'] = 'Training process terminated unexpectedly'
|
86 |
self.append_log("Training process terminated unexpectedly")
|
87 |
else:
|
88 |
-
status['
|
89 |
status['message'] = 'Training process not found'
|
90 |
return status
|
91 |
|
@@ -432,7 +433,7 @@ class TrainingService:
|
|
432 |
def save_status(self, state: str, **kwargs) -> None:
|
433 |
"""Save current training status"""
|
434 |
status = {
|
435 |
-
'
|
436 |
'timestamp': datetime.now().isoformat(),
|
437 |
**kwargs
|
438 |
}
|
|
|
29 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
30 |
handlers=[
|
31 |
logging.StreamHandler(sys.stdout),
|
32 |
+
logging.FileHandler(str(OUTPUT_PATH / 'training_service.log'))
|
33 |
]
|
34 |
)
|
35 |
logger = logging.getLogger(__name__)
|
|
|
65 |
|
66 |
def get_status(self) -> Dict:
|
67 |
"""Get current training status"""
|
68 |
+
default_status = {'status': 'stopped', 'message': 'No training in progress'}
|
69 |
|
70 |
if not self.status_file.exists():
|
71 |
return default_status
|
|
|
73 |
try:
|
74 |
with open(self.status_file, 'r') as f:
|
75 |
status = json.load(f)
|
76 |
+
print("status found in the json:", status)
|
77 |
|
78 |
# Check if process is actually running
|
79 |
if self.pid_file.exists():
|
|
|
81 |
pid = int(f.read().strip())
|
82 |
if not psutil.pid_exists(pid):
|
83 |
# Process died unexpectedly
|
84 |
+
if status['status'] == 'running':
|
85 |
+
status['status'] = 'error'
|
86 |
status['message'] = 'Training process terminated unexpectedly'
|
87 |
self.append_log("Training process terminated unexpectedly")
|
88 |
else:
|
89 |
+
status['status'] = 'stopped'
|
90 |
status['message'] = 'Training process not found'
|
91 |
return status
|
92 |
|
|
|
433 |
def save_status(self, state: str, **kwargs) -> None:
|
434 |
"""Save current training status"""
|
435 |
status = {
|
436 |
+
'status': state,
|
437 |
'timestamp': datetime.now().isoformat(),
|
438 |
**kwargs
|
439 |
}
|