199 lines
7.6 KiB
Cheetah
199 lines
7.6 KiB
Cheetah
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: gadget
|
|
namespace: gadget
|
|
labels:
|
|
k8s-app: gadget
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
k8s-app: gadget
|
|
template:
|
|
metadata:
|
|
labels:
|
|
k8s-app: gadget
|
|
annotations:
|
|
# We need to set gadget container as unconfined so it is able to write
|
|
# /sys/fs/bpf as well as /sys/kernel/debug/tracing.
|
|
# Otherwise, we can have error like:
|
|
# "failed to create server failed to create folder for pinning bpf maps: mkdir /sys/fs/bpf/gadget: permission denied"
|
|
# (For reference, see: https://github.com/inspektor-gadget/inspektor-gadget/runs/3966318270?check_suite_focus=true#step:20:221)
|
|
container.apparmor.security.beta.kubernetes.io/gadget: "unconfined"
|
|
inspektor-gadget.kinvolk.io/option-hook-mode: "auto"
|
|
spec:
|
|
serviceAccount: gadget
|
|
hostPID: true
|
|
hostNetwork: true
|
|
nodeSelector:
|
|
kubernetes.io/os: "linux"
|
|
containers:
|
|
- name: gadget
|
|
terminationMessagePolicy: FallbackToLogsOnError
|
|
image: {{.CustomRegistries.InspektorGadget | default .ImageRepository | default .Registries.InspektorGadget }}{{.Images.InspektorGadget}}
|
|
imagePullPolicy: "Always"
|
|
command: [ "/entrypoint" ]
|
|
lifecycle:
|
|
preStop:
|
|
exec:
|
|
command:
|
|
- "/cleanup"
|
|
readinessProbe:
|
|
periodSeconds: 5
|
|
timeoutSeconds: 2
|
|
exec:
|
|
command:
|
|
- /bin/gadgettracermanager
|
|
- -liveness
|
|
livenessProbe:
|
|
periodSeconds: 5
|
|
timeoutSeconds: 2
|
|
exec:
|
|
command:
|
|
- /bin/gadgettracermanager
|
|
- -liveness
|
|
env:
|
|
- name: NODE_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: spec.nodeName
|
|
- name: GADGET_POD_UID
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.uid
|
|
- name: TRACELOOP_NODE_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: spec.nodeName
|
|
- name: TRACELOOP_POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
- name: TRACELOOP_POD_NAMESPACE
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.namespace
|
|
- name: GADGET_IMAGE
|
|
value: "ghcr.io/inspektor-gadget/inspektor-gadget"
|
|
- name: INSPEKTOR_GADGET_VERSION
|
|
value: "v0.16.1"
|
|
- name: INSPEKTOR_GADGET_OPTION_HOOK_MODE
|
|
value: "auto"
|
|
- name: INSPEKTOR_GADGET_OPTION_FALLBACK_POD_INFORMER
|
|
value: "true"
|
|
# Make sure to keep these settings in sync with pkg/container-utils/runtime-client/interface.go
|
|
- name: INSPEKTOR_GADGET_CONTAINERD_SOCKETPATH
|
|
value: "/run/containerd/containerd.sock"
|
|
- name: INSPEKTOR_GADGET_CRIO_SOCKETPATH
|
|
value: "/run/crio/crio.sock"
|
|
- name: INSPEKTOR_GADGET_DOCKER_SOCKETPATH
|
|
value: "/run/docker.sock"
|
|
- name: HOST_ROOT
|
|
value: "/host"
|
|
securityContext:
|
|
capabilities:
|
|
add:
|
|
# We need CAP_NET_ADMIN to be able to create BPF link.
|
|
# Indeed, link_create is called with prog->type which equals
|
|
# BPF_PROG_TYPE_CGROUP_SKB.
|
|
# This value is then checked in
|
|
# bpf_prog_attach_check_attach_type() which also checks if we have
|
|
# CAP_NET_ADMIN:
|
|
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/bpf/syscall.c#L4099
|
|
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/bpf/syscall.c#L2967
|
|
- NET_ADMIN
|
|
|
|
# We need CAP_SYS_ADMIN to use Python-BCC gadgets because bcc
|
|
# internally calls bpf_get_map_fd_by_id() which contains the
|
|
# following snippet:
|
|
# if (!capable(CAP_SYS_ADMIN))
|
|
# return -EPERM;
|
|
# (https://elixir.bootlin.com/linux/v5.10.73/source/kernel/bpf/syscall.c#L3254)
|
|
#
|
|
# Details about this are given in:
|
|
# > The important design decision is to allow ID->FD transition for
|
|
# CAP_SYS_ADMIN only. What it means that user processes can run
|
|
# with CAP_BPF and CAP_NET_ADMIN and they will not be able to affect each
|
|
# other unless they pass FDs via scm_rights or via pinning in bpffs.
|
|
# ID->FD is a mechanism for human override and introspection.
|
|
# An admin can do 'sudo bpftool prog ...'. It's possible to enforce via LSM that
|
|
# only bpftool binary does bpf syscall with CAP_SYS_ADMIN and the rest of user
|
|
# space processes do bpf syscall with CAP_BPF isolating bpf objects (progs, maps,
|
|
# links) that are owned by such processes from each other.
|
|
# (https://lwn.net/Articles/820560/)
|
|
#
|
|
# Note that even with a kernel providing CAP_BPF, the above
|
|
# statement is still true.
|
|
- SYS_ADMIN
|
|
|
|
# We need this capability to get addresses from /proc/kallsyms.
|
|
# Without it, addresses displayed when reading this file will be
|
|
# 0.
|
|
# Thus, bcc_procutils_each_ksym will never call callback, so KSyms
|
|
# syms_ vector will be empty and it will return false.
|
|
# As a consequence, no prefix will be found in
|
|
# get_syscall_prefix(), so a default prefix (_sys) will be
|
|
# returned.
|
|
# Sadly, this default prefix is not used by the running kernel,
|
|
# which instead uses: __x64_sys_
|
|
- SYSLOG
|
|
|
|
# traceloop gadget uses strace which in turns use ptrace()
|
|
# syscall.
|
|
# Within kernel code, ptrace() calls ptrace_attach() which in
|
|
# turns calls __ptrace_may_access() which calls ptrace_has_cap()
|
|
# where CAP_SYS_PTRACE is finally checked:
|
|
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/ptrace.c#L284
|
|
- SYS_PTRACE
|
|
|
|
# Needed by setrlimit in gadgettracermanager and by the traceloop
|
|
# gadget.
|
|
- SYS_RESOURCE
|
|
|
|
# Needed for gadgets that don't dumb the memory rlimit.
|
|
# (Currently only applies to BCC python-based gadgets)
|
|
- IPC_LOCK
|
|
|
|
# Needed by BCC python-based gadgets to load the kheaders module:
|
|
# https://github.com/iovisor/bcc/blob/v0.24.0/src/cc/frontends/clang/kbuild_helper.cc#L158
|
|
- SYS_MODULE
|
|
|
|
# Needed by gadgets that open a raw sock like dns and snisnoop
|
|
- NET_RAW
|
|
volumeMounts:
|
|
- name: host
|
|
mountPath: /host
|
|
- name: run
|
|
mountPath: /run
|
|
- name: modules
|
|
mountPath: /lib/modules
|
|
- name: debugfs
|
|
mountPath: /sys/kernel/debug
|
|
- name: cgroup
|
|
mountPath: /sys/fs/cgroup
|
|
- name: bpffs
|
|
mountPath: /sys/fs/bpf
|
|
tolerations:
|
|
- effect: NoSchedule
|
|
operator: Exists
|
|
- effect: NoExecute
|
|
operator: Exists
|
|
volumes:
|
|
- name: host
|
|
hostPath:
|
|
path: /
|
|
- name: run
|
|
hostPath:
|
|
path: /run
|
|
- name: cgroup
|
|
hostPath:
|
|
path: /sys/fs/cgroup
|
|
- name: modules
|
|
hostPath:
|
|
path: /lib/modules
|
|
- name: bpffs
|
|
hostPath:
|
|
path: /sys/fs/bpf
|
|
- name: debugfs
|
|
hostPath:
|
|
path: /sys/kernel/debug |