minikube/deploy/kicbase/entrypoint

285 lines
11 KiB
Bash
Executable File

#!/bin/bash
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
set -x
configure_proxy() {
# ensure all processes receive the proxy settings by default
# https://www.freedesktop.org/software/systemd/man/systemd-system.conf.html
mkdir -p /etc/systemd/system.conf.d/
cat <<EOF >/etc/systemd/system.conf.d/proxy-default-environment.conf
[Manager]
DefaultEnvironment="HTTP_PROXY=${HTTP_PROXY:-}" "HTTPS_PROXY=${HTTPS_PROXY:-}" "NO_PROXY=${NO_PROXY:-}"
EOF
}
update-alternatives() {
echo "retryable update-alternatives: $*"
local args=$*
for i in $(seq 0 15); do
/usr/bin/update-alternatives $args && return || echo "update-alternatives $args failed (retry $i)"
echo "update-alternatives diagnostics information below:"
mount
df -h /var
find /var/lib/dpkg
dmesg | tail
sleep 1
done
exit 30
}
fix_mount() {
echo 'INFO: ensuring we can execute mount/umount even with userns-remap'
# necessary only when userns-remap is enabled on the host, but harmless
# The binary /bin/mount should be owned by root and have the setuid bit
chown root:root "$(which mount)" "$(which umount)"
chmod -s "$(which mount)" "$(which umount)"
# This is a workaround to an AUFS bug that might cause `Text file
# busy` on `mount` command below. See more details in
# https://github.com/moby/moby/issues/9547
if [[ "$(stat -f -c %T "$(which mount)")" == 'aufs' ]]; then
echo 'INFO: detected aufs, calling sync' >&2
sync
fi
echo 'INFO: remounting /sys read-only'
# systemd-in-a-container should have read only /sys
# https://systemd.io/CONTAINER_INTERFACE/
# however, we need other things from `docker run --privileged` ...
# and this flag also happens to make /sys rw, amongst other things
mount -o remount,ro /sys
echo 'INFO: making mounts shared' >&2
# for mount propagation
mount --make-rshared /
}
fix_cgroup_mounts() {
echo 'INFO: fix cgroup mounts for all subsystems'
# For each cgroup subsystem, Docker does a bind mount from the current
# cgroup to the root of the cgroup subsystem. For instance:
# /sys/fs/cgroup/memory/docker/<cid> -> /sys/fs/cgroup/memory
#
# This will confuse Kubelet and cadvisor and will dump the following error
# messages in kubelet log:
# `summary_sys_containers.go:47] Failed to get system container stats for ".../kubelet.service"`
#
# This is because `/proc/<pid>/cgroup` is not affected by the bind mount.
# The following is a workaround to recreate the original cgroup
# environment by doing another bind mount for each subsystem.
local cgroup_mounts
# This regexp finds all /sys/fs/cgroup mounts that are cgroupfs and mounted somewhere other than / - extracting fields 4+
# See https://man7.org/linux/man-pages/man5/proc.5.html for field names
# Example inputs:
#
# Docker: /docker/562a56986a84b3cd38d6a32ac43fdfcc8ad4d2473acf2839cbf549273f35c206 /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:143 master:23 - cgroup devices rw,devices
# podman: /libpod_parent/libpod-73a4fb9769188ae5dc51cb7e24b9f2752a4af7b802a8949f06a7b2f2363ab0e9 ...
# Cloud Shell: /kubepods/besteffort/pod3d6beaa3004913efb68ce073d73494b0/accdf94879f0a494f317e9a0517f23cdd18b35ff9439efd0175f17bbc56877c4 /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime master:19 - cgroup cgroup rw,memory
# GitHub actions #9304: /actions_job/0924fbbcf7b18d2a00c171482b4600747afc367a9dfbeac9d6b14b35cda80399 /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:263 master:24 - cgroup cgroup rw,memory
cgroup_mounts=$(grep -E -o '/[[:alnum:]].* /sys/fs/cgroup.*.*cgroup' /proc/self/mountinfo || true)
if [[ -n "${cgroup_mounts}" ]]; then
local mount_root
mount_root=$(echo "${cgroup_mounts}" | head -n 1 | cut -d' ' -f1)
for mount_point in $(echo "${cgroup_mounts}" | cut -d' ' -f 2); do
# bind mount each mount_point to mount_point + mount_root
# mount --bind /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/docker/fb07bb6daf7730a3cb14fc7ff3e345d1e47423756ce54409e66e01911bab2160
local target="${mount_point}${mount_root}"
if ! findmnt "${target}"; then
mkdir -p "${target}"
mount --bind "${mount_point}" "${target}"
fi
done
fi
}
retryable_fix_cgroup_mounts() {
for i in $(seq 0 10); do
fix_cgroup_mounts && return || echo "fix_cgroup failed with exit code $? (retry $i)"
echo "fix_cgroup diagnostics information below:"
mount
sleep 1
done
exit 31
}
fix_machine_id() {
# Deletes the machine-id embedded in the node image and generates a new one.
# This is necessary because both kubelet and other components like weave net
# use machine-id internally to distinguish nodes.
echo 'INFO: clearing and regenerating /etc/machine-id' >&2
rm -f /etc/machine-id
systemd-machine-id-setup
}
fix_product_name() {
# this is a small fix to hide the underlying hardware and fix issue #426
# https://github.com/kubernetes-sigs/kind/issues/426
if [[ -f /sys/class/dmi/id/product_name ]]; then
echo 'INFO: faking /sys/class/dmi/id/product_name to be "kind"' >&2
echo 'kind' > /kind/product_name
mount -o ro,bind /kind/product_name /sys/class/dmi/id/product_name
fi
}
fix_product_uuid() {
# The system UUID is usually read from DMI via sysfs, the problem is that
# in the kind case this means that all (container) nodes share the same
# system/product uuid, as they share the same DMI.
# Note: The UUID is read from DMI, this tool is overwriting the sysfs files
# which should fix the attached issue, but this workaround does not address
# the issue if a tool is reading directly from DMI.
# https://github.com/kubernetes-sigs/kind/issues/1027
[[ ! -f /kind/product_uuid ]] && cat /proc/sys/kernel/random/uuid > /kind/product_uuid
if [[ -f /sys/class/dmi/id/product_uuid ]]; then
echo 'INFO: faking /sys/class/dmi/id/product_uuid to be random' >&2
mount -o ro,bind /kind/product_uuid /sys/class/dmi/id/product_uuid
fi
if [[ -f /sys/devices/virtual/dmi/id/product_uuid ]]; then
echo 'INFO: faking /sys/devices/virtual/dmi/id/product_uuid as well' >&2
mount -o ro,bind /kind/product_uuid /sys/devices/virtual/dmi/id/product_uuid
fi
}
fix_kmsg() {
# In environments where /dev/kmsg is not available, the kubelet (1.15+) won't
# start because it cannot open /dev/kmsg when starting the kmsgparser in the
# OOM parser.
# To support those environments, we link /dev/kmsg to /dev/console.
# https://github.com/kubernetes-sigs/kind/issues/662
if [[ ! -e /dev/kmsg ]]; then
if [[ -e /dev/console ]]; then
echo 'WARN: /dev/kmsg does not exist, symlinking /dev/console' >&2
ln -s /dev/console /dev/kmsg
else
echo 'WARN: /dev/kmsg does not exist, nor does /dev/console!' >&2
fi
fi
}
select_iptables() {
# based on: https://github.com/kubernetes/kubernetes/blob/ffe93b3979486feb41a0f85191bdd189cbd56ccc/build/debian-iptables/iptables-wrapper
local mode=nft
num_legacy_lines=$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep '^-' | wc -l || true)
if [ "${num_legacy_lines}" -ge 10 ]; then
mode=legacy
else
num_nft_lines=$( (timeout 5 sh -c "iptables-nft-save; ip6tables-nft-save" || true) 2>/dev/null | grep '^-' | wc -l || true)
if [ "${num_legacy_lines}" -ge "${num_nft_lines}" ]; then
mode=legacy
fi
fi
echo "INFO: setting iptables to detected mode: ${mode}" >&2
update-alternatives --set iptables "/usr/sbin/iptables-${mode}" > /dev/null
update-alternatives --set ip6tables "/usr/sbin/ip6tables-${mode}" > /dev/null
}
enable_network_magic(){
# well-known docker embedded DNS is at 127.0.0.11:53
local docker_embedded_dns_ip='127.0.0.11'
# first we need to detect an IP to use for reaching the docker host
local docker_host_ip
docker_host_ip="$( (getent ahostsv4 'host.docker.internal' | head -n1 | cut -d' ' -f1) || true)"
if [[ -z "${docker_host_ip}" ]]; then
docker_host_ip=$(ip -4 route show default | cut -d' ' -f3)
fi
# patch docker's iptables rules to switch out the DNS IP
iptables-save \
| sed \
`# switch docker DNS DNAT rules to our chosen IP` \
-e "s/-d ${docker_embedded_dns_ip}/-d ${docker_host_ip}/g" \
`# we need to also apply these rules to non-local traffic (from pods)` \
-e 's/-A OUTPUT \(.*\) -j DOCKER_OUTPUT/\0\n-A PREROUTING \1 -j DOCKER_OUTPUT/' \
`# switch docker DNS SNAT rules rules to our chosen IP` \
-e "s/--to-source :53/--to-source ${docker_host_ip}:53/g"\
| iptables-restore
# now we can ensure that DNS is configured to use our IP
cp /etc/resolv.conf /etc/resolv.conf.original
sed -e "s/${docker_embedded_dns_ip}/${docker_host_ip}/g" /etc/resolv.conf.original >/etc/resolv.conf
# fixup IPs in manifests ...
curr_ipv4="$( (getent ahostsv4 "$(hostname)" | head -n1 | cut -d' ' -f1) || true)"
echo "INFO: Detected IPv4 address: ${curr_ipv4}" >&2
if [ -f /kind/old-ipv4 ]; then
old_ipv4=$(cat /kind/old-ipv4)
echo "INFO: Detected old IPv4 address: ${old_ipv4}" >&2
# sanity check that we have a current address
if [[ -z $curr_ipv4 ]]; then
echo "ERROR: Have an old IPv4 address but no current IPv4 address (!)" >&2
exit 1
fi
# kubernetes manifests are only present on control-plane nodes
sed -i "s#${old_ipv4}#${curr_ipv4}#" /etc/kubernetes/manifests/*.yaml || true
# this is no longer required with autodiscovery
sed -i "s#${old_ipv4}#${curr_ipv4}#" /var/lib/kubelet/kubeadm-flags.env || true
fi
if [[ -n $curr_ipv4 ]]; then
echo -n "${curr_ipv4}" >/kind/old-ipv4
fi
# do IPv6
curr_ipv6="$( (getent ahostsv6 "$(hostname)" | head -n1 | cut -d' ' -f1) || true)"
echo "INFO: Detected IPv6 address: ${curr_ipv6}" >&2
if [ -f /kind/old-ipv6 ]; then
old_ipv6=$(cat /kind/old-ipv6)
echo "INFO: Detected old IPv6 address: ${old_ipv6}" >&2
# sanity check that we have a current address
if [[ -z $curr_ipv6 ]]; then
echo "ERROR: Have an old IPv6 address but no current IPv6 address (!)" >&2
fi
# kubernetes manifests are only present on control-plane nodes
sed -i "s#${old_ipv6}#${curr_ipv6}#" /etc/kubernetes/manifests/*.yaml || true
# this is no longer required with autodiscovery
sed -i "s#${old_ipv6}#${curr_ipv6}#" /var/lib/kubelet/kubeadm-flags.env || true
fi
if [[ -n $curr_ipv6 ]]; then
echo -n "${curr_ipv6}" >/kind/old-ipv6
fi
}
# run pre-init fixups
# NOTE: it's important that we do configure* first in this order to avoid races
configure_proxy
fix_kmsg
fix_mount
retryable_fix_cgroup_mounts
fix_machine_id
fix_product_name
fix_product_uuid
select_iptables
enable_network_magic
# we want the command (expected to be systemd) to be PID1, so exec to it
exec "$@"