Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions files/bin/mount-bpf-fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env bash

set -o errexit
set -o nounset

SYSTEMD_UNIT_DIR="/etc/systemd/system"
SYSTEMD_UNIT="sys-fs-bpf.mount"
SYSTEMD_UNIT_PATH="$SYSTEMD_UNIT_DIR/$SYSTEMD_UNIT"
MOUNT_POINT="/sys/fs/bpf"
FS_TYPE="bpf"

MOUNT_BPF_FS_DEBUG=${MOUNT_BPF_FS_DEBUG:-false}
function debug() {
if [ "$MOUNT_BPF_FS_DEBUG" = "true" ]; then
echo >&2 "DEBUG:" "$@"
fi
}

if [ $(mount --types "$FS_TYPE" | wc -l) -gt 0 ]; then
debug "$FS_TYPE filesystem already mounted!"
exit 0
elif mount | awk '{print $3}' | grep "$MOUNT_POINT"; then
debug "mount point at $MOUNT_POINT already exists!"
exit 0
elif [ -f "$SYSTEMD_UNIT_PATH" ]; then
debug "systemd unit at $SYSTEMD_UNIT_PATH already exists!"
exit 0
fi

mkdir -p "$SYSTEMD_UNIT_DIR"
cat > "$SYSTEMD_UNIT_PATH" << EOL
[Unit]
Description=BPF mounts
Documentation=https://docs.kernel.org/bpf/index.html
DefaultDependencies=no
Before=local-fs.target umount.target
After=swap.target

[Mount]
What=bpffs
Where=$MOUNT_POINT
Type=bpf
Options=rw,nosuid,nodev,noexec,relatime,mode=700

[Install]
WantedBy=multi-user.target
EOL

systemctl enable "$SYSTEMD_UNIT"
systemctl start "$SYSTEMD_UNIT"
35 changes: 26 additions & 9 deletions files/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,24 @@ function print_help {
echo "Bootstraps an instance into an EKS cluster"
echo ""
echo "-h,--help print this help"
echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)"
echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\""
echo
echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\""
echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints."
echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)"
echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)"
echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI"
echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\""
echo "--cluster-id Specify the id of EKS cluster"
echo "--container-runtime Specify a container runtime (default: dockerd)"
echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults."
echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface"
echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI"
echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)"
echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)"
echo "--ip-family Specify ip family of the cluster"
echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints."
echo "--mount-bfs-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.27+; false otherwise)"
echo "--pause-container-account The AWS account (number) to pull the pause container from"
echo "--pause-container-version The tag of the pause container"
echo "--container-runtime Specify a container runtime (default: dockerd)"
echo "--ip-family Specify ip family of the cluster"
echo "--service-ipv6-cidr ipv6 cidr range of the cluster"
echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)"
echo "--cluster-id Specify the id of EKS cluster"
echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)"
}

POSITIONAL=()
Expand Down Expand Up @@ -123,6 +125,11 @@ while [[ $# -gt 0 ]]; do
shift
shift
;;
--mount-bpf-fs)
MOUNT_BPF_FS=$2
shift
shift
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
Expand Down Expand Up @@ -178,6 +185,12 @@ SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}"
ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}"
CLUSTER_ID="${CLUSTER_ID:-}"

DEFAULT_MOUNT_BPF_FS="true"
if vercmp "$KUBELET_VERSION" lt "1.27.0"; then
DEFAULT_MOUNT_BPF_FS="false"
fi
MOUNT_BPF_FS="${MOUNT_BPF_FS:-$DEFAULT_MOUNT_BPF_FS}"

# Helper function which calculates the amount of the given resource (either CPU or memory)
# to reserve in a given resource range, specified by a start and end of the range and a percentage
# of the resource to reserve. Note that we return zero if the start of the resource range is
Expand Down Expand Up @@ -269,6 +282,10 @@ if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then
exit 1
fi

if [ "$MOUNT_BPF_FS" = "true" ]; then
sudo mount-bpf-fs
fi

ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}")
PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause}
PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION"
Expand Down
96 changes: 96 additions & 0 deletions test/cases/mount-bpf-fs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

export MOUNT_BPF_FS_DEBUG=true

echo "--> Should succeed if bpf type fs already exists"
function mount() {
echo "none on /foo/bar type bpf (rw,nosuid,nodev,noexec,relatime,mode=700)"
}
export -f mount
EXIT_CODE=0
mount-bpf-fs || EXIT_CODE=$?
if [[ ${EXIT_CODE} -ne 0 ]]; then
echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE"
exit 1
fi
export -nf mount

echo "--> Should succeed if mount point already exists"
function mount() {
echo "none on /sys/fs/bpf type foo (rw,nosuid,nodev,noexec,relatime,mode=700)"
}
export -f mount
EXIT_CODE=0
mount-bpf-fs || EXIT_CODE=$?
if [[ ${EXIT_CODE} -ne 0 ]]; then
echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE"
exit 1
fi
export -nf mount

echo "--> Should succeed if systemd unit already exists"
function mount() {
echo "foo"
}
export -f mount
SYSTEMD_UNIT=/etc/systemd/system/sys-fs-bpf.mount
mkdir -p $(dirname $SYSTEMD_UNIT)
echo "foo" > $SYSTEMD_UNIT
EXIT_CODE=0
mount-bpf-fs || EXIT_CODE=$?
if [[ ${EXIT_CODE} -ne 0 ]]; then
echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE"
exit 1
fi
export -nf mount
rm $SYSTEMD_UNIT

echo "--> Should default to true on 1.27+"
export KUBELET_VERSION=v1.27.0-eks-ba74326
MOUNT_BPF_FS_MOCK=$(mktemp)
function mount-bpf-fs() {
echo "called" >> $MOUNT_BPF_FS_MOCK
}
export MOUNT_BPF_FS_MOCK
export -f mount-bpf-fs
EXIT_CODE=0
/etc/eks/bootstrap.sh \
--b64-cluster-ca dGVzdA== \
--apiserver-endpoint http://my-api-endpoint \
test || exit_code=$?
if [[ ${EXIT_CODE} -ne 0 ]]; then
echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'"
exit 1
fi
if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then
echo "❌ Test Failed: expected mount-bpf-fs to be called once but it was not!"
exit 1
fi
export -nf mount-bpf-fs

echo "--> Should default to false on 1.26-"
export KUBELET_VERSION=v1.26.0-eks-ba74326
MOUNT_BPF_FS_MOCK=$(mktemp)
function mount-bpf-fs() {
echo "called" >> $MOUNT_BPF_FS_MOCK
}
export MOUNT_BPF_FS_MOCK
export -f mount-bpf-fs
EXIT_CODE=0
/etc/eks/bootstrap.sh \
--b64-cluster-ca dGVzdA== \
--apiserver-endpoint http://my-api-endpoint \
test || exit_code=$?
if [[ ${EXIT_CODE} -ne 0 ]]; then
echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'"
exit 1
fi
if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then
echo "❌ Test Failed: expected mount-bpf-fs to not be called but it was!"
exit 1
fi
export -nf mount-bpf-fs
2 changes: 2 additions & 0 deletions test/mocks/aws
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ SCRIPTPATH="$(
pwd -P
)"

echo >&2 "mocking 'aws $@'"

if [[ $1 == "ec2" ]]; then

if [[ $2 == "describe-instance-types" ]]; then
Expand Down
3 changes: 1 addition & 2 deletions test/mocks/iptables-save
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env bash
set -euo pipefail

echo "mocking iptables-save with params $@"
echo >&2 "mocking 'iptables-save $@'"
5 changes: 2 additions & 3 deletions test/mocks/kubelet
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#!/usr/bin/env bash
set -euo pipefail

echo >&2 "mocking 'kubelet $@'"

# The only use of kubelet directly is to get the Kubernetes version,
# so we'll set a default here to avoid test failures, and you can
# override by setting the KUBELET_VERSION environment variable.
if [ $# == 1 ] && [ $1 == "--version" ]; then
echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}"
else
echo "mocking kubelet with params $@"
fi
23 changes: 23 additions & 0 deletions test/mocks/mount
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

echo >&2 "mocking 'mount $@'"

echo 'sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime)
proc on /proc type proc (rw,nosuid,nodev,noexec,relatime)
devtmpfs on /dev type devtmpfs (rw,nosuid,size=4059512k,nr_inodes=1014878,mode=755)
securityfs on /sys/kernel/security type securityfs (rw,nosuid,nodev,noexec,relatime)
tmpfs on /run type tmpfs (rw,nosuid,nodev,mode=755)
tmpfs on /sys/fs/cgroup type tmpfs (ro,nosuid,nodev,noexec,mode=755)
cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd)
pstore on /sys/fs/pstore type pstore (rw,nosuid,nodev,noexec,relatime)
cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio)
cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer)
cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event)
cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb)
cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset)
cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio)
cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct)
cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory)
cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids)
cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices)
/dev/xvda1 on / type xfs (rw,noatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)'
2 changes: 1 addition & 1 deletion test/mocks/sudo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
set -euo pipefail

echo >&2 "mocking 'sudo $@'"
exec "$@"
3 changes: 1 addition & 2 deletions test/mocks/systemctl
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env bash
set -euo pipefail

echo "mocking systemctl with $@"
echo >&2 "mocking 'systemctl $@'"