Skip to content

Commit 0cfd127

Browse files
committed
add "healthy" sdnotify policy
Add a new "healthy" sdnotify policy that instructs Podman to send the READY message once the container has turned healthy. Fixes: #6160 Signed-off-by: Valentin Rothberg <[email protected]>
1 parent 2a25d1d commit 0cfd127

File tree

9 files changed

+109
-14
lines changed

9 files changed

+109
-14
lines changed

cmd/podman/common/completion.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1545,7 +1545,7 @@ func AutocompleteLogLevel(cmd *cobra.Command, args []string, toComplete string)
15451545
// AutocompleteSDNotify - Autocomplete sdnotify options.
15461546
// -> "container", "conmon", "ignore"
15471547
func AutocompleteSDNotify(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
1548-
types := []string{define.SdNotifyModeContainer, define.SdNotifyModeContainer, define.SdNotifyModeIgnore}
1548+
types := []string{define.SdNotifyModeConmon, define.SdNotifyModeContainer, define.SdNotifyModeHealthy, define.SdNotifyModeIgnore}
15491549
return types, cobra.ShellCompDirectiveNoFileComp
15501550
}
15511551

docs/source/markdown/options/sdnotify.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
####> podman create, run
33
####> If file is edited, make sure the changes
44
####> are applicable to all of those.
5-
#### **--sdnotify**=**container** | *conmon* | *ignore*
5+
#### **--sdnotify**=**container** | *conmon* | *healthy* | *ignore*
66

77
Determines how to use the NOTIFY_SOCKET, as passed with systemd and Type=notify.
88

99
Default is **container**, which means allow the OCI runtime to proxy the socket into the
1010
container to receive ready notification. Podman sets the MAINPID to conmon's pid.
1111
The **conmon** option sets MAINPID to conmon's pid, and sends READY when the container
1212
has started. The socket is never passed to the runtime or the container.
13+
The **healthy** option sets MAINPID to conmon's pid, and sends READY when the container
14+
has turned healthy; requires a healthcheck to be set. The socket is never passed to the runtime or the container.
1315
The **ignore** option removes NOTIFY_SOCKET from the environment for itself and child processes,
1416
for the case where some other process above Podman uses NOTIFY_SOCKET and Podman does not use it.

libpod/container_api.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
113113
}
114114

115115
// Start the container
116-
return c.start()
116+
return c.start(ctx)
117117
}
118118

119119
// Update updates the given container.

libpod/container_internal.go

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
308308
return false, err
309309
}
310310
}
311-
if err := c.start(); err != nil {
311+
if err := c.start(ctx); err != nil {
312312
return false, err
313313
}
314314
return true, nil
@@ -1198,11 +1198,11 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
11981198
}
11991199

12001200
// Now start the container
1201-
return c.start()
1201+
return c.start(ctx)
12021202
}
12031203

12041204
// Internal, non-locking function to start a container
1205-
func (c *Container) start() error {
1205+
func (c *Container) start(ctx context.Context) error {
12061206
if c.config.Spec.Process != nil {
12071207
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
12081208
}
@@ -1214,9 +1214,11 @@ func (c *Container) start() error {
12141214

12151215
c.state.State = define.ContainerStateRunning
12161216

1217+
// Unless being ignored, set the MAINPID to conmon.
12171218
if c.config.SdNotifyMode != define.SdNotifyModeIgnore {
12181219
payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID)
12191220
if c.config.SdNotifyMode == define.SdNotifyModeConmon {
1221+
// Also send the READY message for the "conmon" policy.
12201222
payload += "\n"
12211223
payload += daemon.SdNotifyReady
12221224
}
@@ -1241,7 +1243,32 @@ func (c *Container) start() error {
12411243

12421244
defer c.newContainerEvent(events.Start)
12431245

1244-
return c.save()
1246+
if err := c.save(); err != nil {
1247+
return err
1248+
}
1249+
1250+
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
1251+
return nil
1252+
}
1253+
1254+
// Wait for the container to turn healthy before sending the READY
1255+
// message. This implies that we need to unlock and re-lock the
1256+
// container.
1257+
if !c.batched {
1258+
c.lock.Unlock()
1259+
defer c.lock.Lock()
1260+
}
1261+
1262+
if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
1263+
return err
1264+
}
1265+
1266+
if err := notifyproxy.SendMessage(c.config.SdNotifySocket, daemon.SdNotifyReady); err != nil {
1267+
logrus.Errorf("Sending READY message after turning healthy: %s", err.Error())
1268+
} else {
1269+
logrus.Debugf("Notify sent successfully")
1270+
}
1271+
return nil
12451272
}
12461273

12471274
// Internal, non-locking function to stop container
@@ -1487,7 +1514,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
14871514
return err
14881515
}
14891516
}
1490-
return c.start()
1517+
return c.start(ctx)
14911518
}
14921519

14931520
// mountStorage sets up the container's root filesystem

libpod/define/sdnotify.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,18 @@ import "fmt"
44

55
// Strings used for --sdnotify option to podman
66
const (
7-
SdNotifyModeContainer = "container"
87
SdNotifyModeConmon = "conmon"
8+
SdNotifyModeContainer = "container"
9+
SdNotifyModeHealthy = "healthy"
910
SdNotifyModeIgnore = "ignore"
1011
)
1112

1213
// ValidateSdNotifyMode validates the specified mode.
1314
func ValidateSdNotifyMode(mode string) error {
1415
switch mode {
15-
case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore:
16+
case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore, SdNotifyModeHealthy:
1617
return nil
1718
default:
18-
return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore)
19+
return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeConmon, SdNotifyModeContainer, SdNotifyModeHealthy, SdNotifyModeIgnore)
1920
}
2021
}

libpod/oci_conmon_attach_common.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package libpod
55

66
import (
7+
"context"
78
"errors"
89
"fmt"
910
"io"
@@ -86,7 +87,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
8687
// If starting was requested, start the container and notify when that's
8788
// done.
8889
if params.Start {
89-
if err := c.start(); err != nil {
90+
if err := c.start(context.TODO()); err != nil {
9091
return err
9192
}
9293
params.Started <- true

pkg/specgen/container_validate.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ var (
1414
ErrInvalidSpecConfig = errors.New("invalid configuration")
1515
// SystemDValues describes the only values that SystemD can be
1616
SystemDValues = []string{"true", "false", "always"}
17-
// SdNotifyModeValues describes the only values that SdNotifyMode can be
18-
SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore}
1917
// ImageVolumeModeValues describes the only values that ImageVolumeMode can be
2018
ImageVolumeModeValues = []string{"ignore", define.TypeTmpfs, "anonymous"}
2119
)

pkg/specgen/generate/container_create.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,18 +601,25 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
601601
}
602602
options = append(options, libpod.WithRestartRetries(retries), libpod.WithRestartPolicy(restartPolicy))
603603

604+
healthCheckSet := false
604605
if s.ContainerHealthCheckConfig.HealthConfig != nil {
605606
options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig))
606607
logrus.Debugf("New container has a health check")
608+
healthCheckSet = true
607609
}
608610
if s.ContainerHealthCheckConfig.StartupHealthConfig != nil {
609611
options = append(options, libpod.WithStartupHealthcheck(s.ContainerHealthCheckConfig.StartupHealthConfig))
612+
healthCheckSet = true
610613
}
611614

612615
if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
613616
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))
614617
}
615618

619+
if s.SdNotifyMode == define.SdNotifyModeHealthy && !healthCheckSet {
620+
return nil, fmt.Errorf("%w: sdnotify policy %q requires a healthcheck to be set", define.ErrInvalidArg, s.SdNotifyMode)
621+
}
622+
616623
if len(s.Secrets) != 0 {
617624
manager, err := rt.SecretsManager()
618625
if err != nil {

test/system/260-sdnotify.bats

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,65 @@ READY=1"
184184
_stop_socat
185185
}
186186

187+
# These tests can fail in dev. environment because of SELinux.
188+
# quick fix: chcon -t container_runtime_exec_t ./bin/podman
189+
@test "sdnotify : healthy" {
190+
export NOTIFY_SOCKET=$PODMAN_TMPDIR/container.sock
191+
_start_socat
192+
193+
wait_file="$PODMAN_TMPDIR/$(random_string).wait_for_me"
194+
run_podman 125 create --sdnotify=healthy $IMAGE
195+
is "$output" "Error: invalid argument: sdnotify policy \"healthy\" requires a healthcheck to be set"
196+
197+
# Create a container with a simple `/bin/true` healthcheck that we need to
198+
# run manually.
199+
ctr=$(random_string)
200+
run_podman create --name $ctr \
201+
--health-cmd=/bin/true \
202+
--health-retries=1 \
203+
--health-interval=disable \
204+
--sdnotify=healthy \
205+
$IMAGE sleep infinity
206+
207+
# Start the container in the background which will block until the
208+
# container turned healthy. After that, create the wait_file which
209+
# indicates that start has returned.
210+
(timeout --foreground -v --kill=5 20 $PODMAN start $ctr && touch $wait_file) &
211+
212+
run_podman wait --condition=running $ctr
213+
214+
# Make sure that the MAINPID is set but without the READY message.
215+
run_podman container inspect $ctr --format "{{.State.ConmonPid}}"
216+
mainPID="$output"
217+
# With container, READY=1 isn't necessarily the last message received;
218+
# just look for it anywhere in received messages
219+
run cat $_SOCAT_LOG
220+
# The 'echo's help us debug failed runs
221+
echo "socat log:"
222+
echo "$output"
223+
224+
is "$output" "MAINPID=$mainPID" "Container is not healthy yet, so we only know the main PID"
225+
226+
# Now run the healthcheck and look for the READY message.
227+
run_podman healthcheck run $ctr
228+
is "$output" "" "output from 'podman healthcheck run'"
229+
230+
# Wait for start to return. At that point the READY message must have been
231+
# sent.
232+
wait_for_file $wait_file
233+
run cat $_SOCAT_LOG
234+
echo "socat log:"
235+
echo "$output"
236+
is "$output" "MAINPID=$mainPID
237+
READY=1"
238+
239+
run_podman container inspect --format "{{.State.Status}}" $ctr
240+
is "$output" "running" "make sure container is still running"
241+
242+
run_podman rm -f -t0 $ctr
243+
_stop_socat
244+
}
245+
187246
@test "sdnotify : play kube - no policies" {
188247
# Create the YAMl file
189248
yaml_source="$PODMAN_TMPDIR/test.yaml"

0 commit comments

Comments
 (0)