Skip to content

Commit 695df96

Browse files
author
Daniel King
committed
maybe
1 parent d4f5566 commit 695df96

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

composer/loggers/mlflow_logger.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import signal
1515
import sys
1616
import textwrap
17+
import threading
1718
import time
1819
import warnings
1920
from typing import TYPE_CHECKING, Any, Literal, Optional, Sequence, Union
@@ -45,8 +46,6 @@ def __init__(self, main_pid, mlflow_run_id, mlflow_tracking_uri):
4546
self.main_pid = main_pid
4647
self.mlflow_run_id = mlflow_run_id
4748
self.mlflow_tracking_uri = mlflow_tracking_uri
48-
self.exit_event = multiprocessing.Event()
49-
self.crash_event = multiprocessing.Event()
5049

5150
def handle_sigterm(self, signum, frame):
5251
from mlflow import MlflowClient
@@ -57,11 +56,24 @@ def handle_sigterm(self, signum, frame):
5756
client.set_terminated(self.mlflow_run_id, status='KILLED')
5857

5958
def run(self):
59+
self.exit_event = threading.Event()
60+
self.crash_event = threading.Event()
61+
6062
from mlflow import MlflowClient
6163

6264
os.setsid()
63-
# Register the signal handler in the child process
64-
signal.signal(signal.SIGTERM, self.handle_sigterm)
65+
# Define signal handlers for communication
66+
def handle_exit_signal(signum, frame):
67+
self.exit_event.set()
68+
69+
def handle_crash_signal(signum, frame):
70+
self.crash_event.set()
71+
self.exit_event.set()
72+
73+
# Register the signal handlers
74+
signal.signal(signal.SIGUSR1, handle_exit_signal) # For normal exit
75+
signal.signal(signal.SIGUSR2, handle_crash_signal) # For crash exit
76+
signal.signal(signal.SIGTERM, self.handle_sigterm) # For termination
6577

6678
while not self.exit_event.wait(10):
6779
try:
@@ -79,13 +91,12 @@ def run(self):
7991
def stop(self):
8092
log.debug("Setting exit event")
8193
print("Setting exit event")
82-
self.exit_event.set()
94+
os.kill(self.pid, signal.SIGUSR1)
8395
log.debug("Setting exit event done")
8496
print("Setting exit event done")
8597

8698
def crash(self):
87-
self.crash_event.set()
88-
self.exit_event.set()
99+
os.kill(self.pid, signal.SIGUSR2)
89100

90101

91102
class MLFlowLogger(LoggerDestination):

0 commit comments

Comments
 (0)