File tree Expand file tree Collapse file tree 3 files changed +8
-1
lines changed
Expand file tree Collapse file tree 3 files changed +8
-1
lines changed Original file line number Diff line number Diff line change @@ -299,6 +299,8 @@ def task_pre_step(
299299 self ._save_logs_sidecar .start ()
300300
301301 # Start spot termination monitor sidecar.
302+ # TODO: A nicer way to pass the main process id to a Sidecar, in order to allow sidecars to send signals back to the main process.
303+ os .environ ["MF_MAIN_PID" ] = str (os .getpid ())
302304 current ._update_env (
303305 {"spot_termination_notice" : "/tmp/spot_termination_notice" }
304306 )
Original file line number Diff line number Diff line change @@ -559,6 +559,8 @@ def task_pre_step(
559559 self ._save_logs_sidecar .start ()
560560
561561 # Start spot termination monitor sidecar.
562+ # TODO: A nicer way to pass the main process id to a Sidecar, in order to allow sidecars to send signals back to the main process.
563+ os .environ ["MF_MAIN_PID" ] = str (os .getpid ())
562564 current ._update_env (
563565 {"spot_termination_notice" : "/tmp/spot_termination_notice" }
564566 )
Original file line number Diff line number Diff line change @@ -21,6 +21,9 @@ def __init__(self):
2121 self ._token = None
2222 self ._token_expiry = 0
2323
24+ # Due to nesting, os.getppid is not reliable for fetching the main task pid
25+ self .main_pid = int (os .getenv ("MF_MAIN_PID" , os .getppid ()))
26+
2427 if self ._is_aws_spot_instance ():
2528 self ._process = Process (target = self ._monitor_loop )
2629 self ._process .start ()
@@ -71,7 +74,7 @@ def _monitor_loop(self):
7174 if response .status_code == 200 :
7275 termination_time = response .text
7376 self ._emit_termination_metadata (termination_time )
74- os .kill (os . getppid () , signal .SIGTERM )
77+ os .kill (self . main_pid , signal .SIGTERM )
7578 break
7679 except (requests .exceptions .RequestException , requests .exceptions .Timeout ):
7780 pass
You can’t perform that action at this time.
0 commit comments