minikube/deploy/addons/inspektor-gadget/ig-daemonset.yaml.tmpl

199 lines
7.6 KiB
Cheetah

apiVersion: apps/v1
kind: DaemonSet
metadata:
name: gadget
namespace: gadget
labels:
k8s-app: gadget
spec:
selector:
matchLabels:
k8s-app: gadget
template:
metadata:
labels:
k8s-app: gadget
annotations:
# We need to set gadget container as unconfined so it is able to write
# /sys/fs/bpf as well as /sys/kernel/debug/tracing.
# Otherwise, we can have error like:
# "failed to create server failed to create folder for pinning bpf maps: mkdir /sys/fs/bpf/gadget: permission denied"
# (For reference, see: https://github.com/inspektor-gadget/inspektor-gadget/runs/3966318270?check_suite_focus=true#step:20:221)
container.apparmor.security.beta.kubernetes.io/gadget: "unconfined"
inspektor-gadget.kinvolk.io/option-hook-mode: "auto"
spec:
serviceAccount: gadget
hostPID: true
hostNetwork: true
nodeSelector:
kubernetes.io/os: "linux"
containers:
- name: gadget
terminationMessagePolicy: FallbackToLogsOnError
image: {{.CustomRegistries.InspektorGadget | default .ImageRepository | default .Registries.InspektorGadget }}{{.Images.InspektorGadget}}
imagePullPolicy: "Always"
command: [ "/entrypoint" ]
lifecycle:
preStop:
exec:
command:
- "/cleanup"
readinessProbe:
periodSeconds: 5
timeoutSeconds: 2
exec:
command:
- /bin/gadgettracermanager
- -liveness
livenessProbe:
periodSeconds: 5
timeoutSeconds: 2
exec:
command:
- /bin/gadgettracermanager
- -liveness
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: GADGET_POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: TRACELOOP_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: TRACELOOP_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: TRACELOOP_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: GADGET_IMAGE
value: "ghcr.io/inspektor-gadget/inspektor-gadget"
- name: INSPEKTOR_GADGET_VERSION
value: "v0.16.1"
- name: INSPEKTOR_GADGET_OPTION_HOOK_MODE
value: "auto"
- name: INSPEKTOR_GADGET_OPTION_FALLBACK_POD_INFORMER
value: "true"
# Make sure to keep these settings in sync with pkg/container-utils/runtime-client/interface.go
- name: INSPEKTOR_GADGET_CONTAINERD_SOCKETPATH
value: "/run/containerd/containerd.sock"
- name: INSPEKTOR_GADGET_CRIO_SOCKETPATH
value: "/run/crio/crio.sock"
- name: INSPEKTOR_GADGET_DOCKER_SOCKETPATH
value: "/run/docker.sock"
- name: HOST_ROOT
value: "/host"
securityContext:
capabilities:
add:
# We need CAP_NET_ADMIN to be able to create BPF link.
# Indeed, link_create is called with prog->type which equals
# BPF_PROG_TYPE_CGROUP_SKB.
# This value is then checked in
# bpf_prog_attach_check_attach_type() which also checks if we have
# CAP_NET_ADMIN:
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/bpf/syscall.c#L4099
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/bpf/syscall.c#L2967
- NET_ADMIN
# We need CAP_SYS_ADMIN to use Python-BCC gadgets because bcc
# internally calls bpf_get_map_fd_by_id() which contains the
# following snippet:
# if (!capable(CAP_SYS_ADMIN))
# return -EPERM;
# (https://elixir.bootlin.com/linux/v5.10.73/source/kernel/bpf/syscall.c#L3254)
#
# Details about this are given in:
# > The important design decision is to allow ID->FD transition for
# CAP_SYS_ADMIN only. What it means that user processes can run
# with CAP_BPF and CAP_NET_ADMIN and they will not be able to affect each
# other unless they pass FDs via scm_rights or via pinning in bpffs.
# ID->FD is a mechanism for human override and introspection.
# An admin can do 'sudo bpftool prog ...'. It's possible to enforce via LSM that
# only bpftool binary does bpf syscall with CAP_SYS_ADMIN and the rest of user
# space processes do bpf syscall with CAP_BPF isolating bpf objects (progs, maps,
# links) that are owned by such processes from each other.
# (https://lwn.net/Articles/820560/)
#
# Note that even with a kernel providing CAP_BPF, the above
# statement is still true.
- SYS_ADMIN
# We need this capability to get addresses from /proc/kallsyms.
# Without it, addresses displayed when reading this file will be
# 0.
# Thus, bcc_procutils_each_ksym will never call callback, so KSyms
# syms_ vector will be empty and it will return false.
# As a consequence, no prefix will be found in
# get_syscall_prefix(), so a default prefix (_sys) will be
# returned.
# Sadly, this default prefix is not used by the running kernel,
# which instead uses: __x64_sys_
- SYSLOG
# traceloop gadget uses strace which in turns use ptrace()
# syscall.
# Within kernel code, ptrace() calls ptrace_attach() which in
# turns calls __ptrace_may_access() which calls ptrace_has_cap()
# where CAP_SYS_PTRACE is finally checked:
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/ptrace.c#L284
- SYS_PTRACE
# Needed by setrlimit in gadgettracermanager and by the traceloop
# gadget.
- SYS_RESOURCE
# Needed for gadgets that don't dumb the memory rlimit.
# (Currently only applies to BCC python-based gadgets)
- IPC_LOCK
# Needed by BCC python-based gadgets to load the kheaders module:
# https://github.com/iovisor/bcc/blob/v0.24.0/src/cc/frontends/clang/kbuild_helper.cc#L158
- SYS_MODULE
# Needed by gadgets that open a raw sock like dns and snisnoop
- NET_RAW
volumeMounts:
- name: host
mountPath: /host
- name: run
mountPath: /run
- name: modules
mountPath: /lib/modules
- name: debugfs
mountPath: /sys/kernel/debug
- name: cgroup
mountPath: /sys/fs/cgroup
- name: bpffs
mountPath: /sys/fs/bpf
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
volumes:
- name: host
hostPath:
path: /
- name: run
hostPath:
path: /run
- name: cgroup
hostPath:
path: /sys/fs/cgroup
- name: modules
hostPath:
path: /lib/modules
- name: bpffs
hostPath:
path: /sys/fs/bpf
- name: debugfs
hostPath:
path: /sys/kernel/debug