Skip to content

Commit afb66de

Browse files
authored
Fix shutdown from AWS Creds (#67)
* Use SIGUSR1 to signify clean shutdown * Fix bug, parse TTL as duration, add test * Update README
1 parent 830e8a0 commit afb66de

File tree

5 files changed

+53
-19
lines changed

5 files changed

+53
-19
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
language: go
22

33
go:
4-
- 1.13.x
4+
- 1.16.x
55

66
before_install:
77
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(go env GOPATH)/bin v1.21.0

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,9 @@ vars so that they can be picked up by logging systems. They are as follows:
261261
request. By itself this will give you the default TTL for the policy
262262

263263
* `vault.AWSRoleTTL` - This will allow you extend the requested time, up to
264-
the max allowed by Vault for the policy
264+
the max allowed by Vault for the policy. The value is a string, specified
265+
in [Go Duration format](https://golang.org/pkg/time/#ParseDuration). E.g.
266+
"1m40s" for 1 minute and 40 seconds.
265267

266268

267269
Configuring Docker Connectivity

callbacks_test.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ func Test_ExecutorCallbacks(t *testing.T) {
451451

452452
Convey("Ups the TTL on creds from Vault when specified", func() {
453453
dummyContainerLabels["vault.AWSRole"] = "valid-aws-role"
454-
dummyContainerLabels["vault.AWSRoleTTL"] = "100"
454+
dummyContainerLabels["vault.AWSRoleTTL"] = "1m40s"
455455
taskInfo.Container.Docker.Parameters = labelsToDockerParams(dummyContainerLabels)
456456

457457
// We'll use logging output to validate that the goroutine ran
@@ -469,6 +469,23 @@ func Test_ExecutorCallbacks(t *testing.T) {
469469
So(capture.String(), ShouldNotContainSubstring, "Unable to renew")
470470
})
471471

472+
Convey("Fails the deploy when the TTL is not parseable", func() {
473+
dummyContainerLabels["vault.AWSRole"] = "valid-aws-role"
474+
dummyContainerLabels["vault.AWSRoleTTL"] = "100"
475+
taskInfo.Container.Docker.Parameters = labelsToDockerParams(dummyContainerLabels)
476+
477+
// We'll use logging output to validate that the goroutine ran
478+
var capture bytes.Buffer
479+
log.SetLevel(log.DebugLevel)
480+
log.SetOutput(&capture)
481+
482+
exec.LaunchTask(&taskInfo)
483+
484+
log.SetOutput(ioutil.Discard)
485+
486+
So(capture.String(), ShouldContainSubstring, "Invalid TTL passed in Docker label vaul.AWSRoleTTL")
487+
})
488+
472489
Convey("Fails to launch a task when the AWS Rols is wrong", func() {
473490
dummyContainerLabels["vault.AWSRole"] = "invalid-aws-role"
474491
taskInfo.Container.Docker.Parameters = labelsToDockerParams(dummyContainerLabels)

executor.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"net/http"
88
"os"
99
"regexp"
10-
"strconv"
1110
"strings"
1211
"sync"
1312
"syscall"
@@ -266,7 +265,7 @@ func (exec *sidecarExecutor) monitorTask(cntnrId string, taskInfo *mesos.TaskInf
266265
}
267266

268267
// watcherWg is used to let the Sidecar draining exit early if the
269-
// container exits
268+
// container exits, and when shutting down from the signal handler.
270269
exec.watcherWg.Add(1)
271270

272271
containerName := container.GetContainerName(&taskInfo.TaskID)
@@ -284,15 +283,15 @@ func (exec *sidecarExecutor) monitorTask(cntnrId string, taskInfo *mesos.TaskInf
284283

285284
if err != nil {
286285
log.Errorf("Error! %s", err)
286+
}
287287

288-
if exitCode == StillRunning {
289-
// Something went wrong, we better take this thing out!
290-
err := container.StopContainer(
291-
exec.client, containerName, exec.config.KillTaskTimeout,
292-
)
293-
if err != nil {
294-
log.Errorf("Error stopping container %s! %s", containerName, err)
295-
}
288+
if exitCode == StillRunning {
289+
// Something went wrong, we better take this thing out!
290+
err := container.StopContainer(
291+
exec.client, containerName, exec.config.KillTaskTimeout,
292+
)
293+
if err != nil {
294+
log.Errorf("Error stopping container %s! %s", containerName, err)
296295
}
297296
}
298297

@@ -466,7 +465,7 @@ func (exec *sidecarExecutor) monitorAWSCredsLease() {
466465
}
467466

468467
log.Info("Attempting to shutdown because of AWS credential lease expiration")
469-
ourProcess.Signal(syscall.SIGTERM)
468+
ourProcess.Signal(syscall.SIGUSR1)
470469
}
471470

472471
// AddAndMonitorVaultAWSKeys gets the aws keys for the specified role from Vault, begins monitoring
@@ -491,12 +490,12 @@ func (exec *sidecarExecutor) AddAndMonitorVaultAWSKeys(addEnvVars []string, role
491490
// will allow longer TTLs than the default, limited to no more than the max allowed by Vault.
492491
func (exec *sidecarExecutor) SetVaultAWSTTL(ttlStr string) error {
493492
log.Infof("Renewing AWS Lease ID '%s'", exec.awsCredsLease.LeaseID)
494-
ttl, err := strconv.Atoi(ttlStr)
493+
ttl, err := time.ParseDuration(ttlStr)
495494
if ttl < 1 || err != nil {
496495
return fmt.Errorf("Invalid TTL passed in Docker label vaul.AWSRoleTTL. Could not parse: '%s'", ttlStr)
497496
}
498497

499-
newLease, err := exec.vault.RenewAWSCredsLease(exec.awsCredsLease, ttl)
498+
newLease, err := exec.vault.RenewAWSCredsLease(exec.awsCredsLease, int(ttl.Seconds()))
500499
if err != nil {
501500
return fmt.Errorf("Unable to renew AWS Creds Lease: %s", err)
502501
}

main.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,34 @@ func handleSignals(scExec *sidecarExecutor) {
194194
sigChan := make(chan os.Signal, 1) // Buffered!
195195

196196
// Grab some signals we want to catch where possible
197-
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
197+
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGUSR1)
198198

199199
sig := <-sigChan
200200
log.Warnf("Received signal '%s', attempting clean shutdown", sig)
201+
202+
exitCode := 130
203+
201204
if scExec.watchLooper != nil {
202-
scExec.watchLooper.Done(errors.New("Got " + sig.String() + " signal!"))
205+
// Signal to monitorTask() to exit
206+
if sig == syscall.SIGUSR1 {
207+
// Intentionally invoked clean shutdown
208+
scExec.watchLooper.Quit()
209+
exitCode = 0
210+
} else {
211+
scExec.watchLooper.Done(errors.New("Got " + sig.String() + " signal!"))
212+
}
213+
214+
// Wait for monitorTask()'s goroutine to exit
215+
scExec.watcherWg.Wait()
203216
}
217+
218+
// Shut down log pump if running
204219
if scExec.logsQuitChan != nil {
205220
close(scExec.logsQuitChan) // Signal loops to exit
206221
}
222+
207223
time.Sleep(3 * time.Second) // Try to let it quit
208-
os.Exit(130) // Ctrl-C received or equivalent
224+
os.Exit(exitCode) // Ctrl-C received or equivalent
209225
}
210226

211227
func initConfig() (Config, error) {

0 commit comments

Comments
 (0)