Skip to content

Commit 9ce079a

Browse files
author
Daniel King
committed
pc
1 parent 695df96 commit 9ce079a

File tree

1 file changed

+27
-25
lines changed

1 file changed

+27
-25
lines changed

composer/loggers/mlflow_logger.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
spawn_context = multiprocessing.get_context('spawn')
4141

42+
4243
class MlflowMonitorProcess(spawn_context.Process):
4344

4445
def __init__(self, main_pid, mlflow_run_id, mlflow_tracking_uri):
@@ -62,18 +63,19 @@ def run(self):
6263
from mlflow import MlflowClient
6364

6465
os.setsid()
66+
6567
# Define signal handlers for communication
6668
def handle_exit_signal(signum, frame):
6769
self.exit_event.set()
68-
70+
6971
def handle_crash_signal(signum, frame):
7072
self.crash_event.set()
7173
self.exit_event.set()
72-
74+
7375
# Register the signal handlers
7476
signal.signal(signal.SIGUSR1, handle_exit_signal) # For normal exit
75-
signal.signal(signal.SIGUSR2, handle_crash_signal) # For crash exit
76-
signal.signal(signal.SIGTERM, self.handle_sigterm) # For termination
77+
signal.signal(signal.SIGUSR2, handle_crash_signal) # For crash exit
78+
signal.signal(signal.SIGTERM, self.handle_sigterm) # For termination
7779

7880
while not self.exit_event.wait(10):
7981
try:
@@ -89,11 +91,11 @@ def handle_crash_signal(signum, frame):
8991
client.set_terminated(self.mlflow_run_id, status='FAILED')
9092

9193
def stop(self):
92-
log.debug("Setting exit event")
93-
print("Setting exit event")
94+
log.debug('Setting exit event')
95+
print('Setting exit event')
9496
os.kill(self.pid, signal.SIGUSR1)
95-
log.debug("Setting exit event done")
96-
print("Setting exit event done")
97+
log.debug('Setting exit event done')
98+
print('Setting exit event done')
9799

98100
def crash(self):
99101
os.kill(self.pid, signal.SIGUSR2)
@@ -608,50 +610,50 @@ def log_images(
608610
def post_close(self):
609611
if self._enabled:
610612
if hasattr(self, 'monitor_process'):
611-
log.debug("Stopping the monitor process")
613+
log.debug('Stopping the monitor process')
612614
# Check if there is an uncaught exception, which means `post_close()` is triggered
613615
# due to program crash.
614616
finish_with_exception = self._global_exception_occurred == 1
615617
if finish_with_exception:
616-
log.debug("Crashing the monitor process")
618+
log.debug('Crashing the monitor process')
617619
self.monitor_process.crash()
618-
log.debug("Returning 1")
620+
log.debug('Returning 1')
619621
return
620622

621-
log.debug("Stopping the monitor process")
623+
log.debug('Stopping the monitor process')
622624
# Stop the monitor process since it's entering the cleanup phase.
623625
self.monitor_process.stop()
624-
log.debug("Stopped the monitor process")
626+
log.debug('Stopped the monitor process')
625627

626628
import mlflow
627629

628630
assert isinstance(self._run_id, str)
629631

630-
log.debug("Flushing")
632+
log.debug('Flushing')
631633
mlflow.flush_async_logging()
632-
log.debug("Flushed")
634+
log.debug('Flushed')
633635
exc_tpe, exc_info, tb = sys.exc_info()
634636
if (exc_tpe, exc_info, tb) == (None, None, None):
635-
log.debug("Get run")
637+
log.debug('Get run')
636638
current_status = self._mlflow_client.get_run(self._run_id).info.status
637-
log.debug("Gotten run")
639+
log.debug('Gotten run')
638640
if current_status == 'RUNNING':
639-
log.debug("Set terminated")
641+
log.debug('Set terminated')
640642
self._mlflow_client.set_terminated(self._run_id, status='FINISHED')
641-
log.debug("Set terminated done")
643+
log.debug('Set terminated done')
642644
else:
643-
log.debug("Set terminated 2")
645+
log.debug('Set terminated 2')
644646
# Record there was an error
645647
self._mlflow_client.set_terminated(self._run_id, status='FAILED')
646-
log.debug("Set terminated done 2")
648+
log.debug('Set terminated done 2')
647649

648-
log.debug("End run")
650+
log.debug('End run')
649651
mlflow.end_run()
650-
log.debug("End run done")
652+
log.debug('End run done')
651653
if hasattr(self, 'monitor_process'):
652-
log.debug("Join the monitor process")
654+
log.debug('Join the monitor process')
653655
self.monitor_process.join()
654-
log.debug("Joined the monitor process")
656+
log.debug('Joined the monitor process')
655657

656658

657659
def _convert_to_mlflow_image(

0 commit comments

Comments
 (0)