unified minikube cluster status query (#18998)

* feat: unified minikube cluster status query

* Update pkg/minikube/cluster/status.go

Co-authored-by: Steven Powell <44844360+spowelljr@users.noreply.github.com>

* Update pkg/minikube/cluster/status.go

Co-authored-by: Steven Powell <44844360+spowelljr@users.noreply.github.com>

* Update pkg/minikube/cluster/status.go

Co-authored-by: Steven Powell <44844360+spowelljr@users.noreply.github.com>

---------

Co-authored-by: Steven Powell <44844360+spowelljr@users.noreply.github.com>
pull/19666/head
錦南路之花 2024-09-18 20:40:06 +02:00 committed by GitHub
parent 025b327e5f
commit c38897dc95
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 564 additions and 549 deletions

View File

@ -23,10 +23,9 @@ import (
"strconv"
"strings"
"k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify"
"k8s.io/minikube/pkg/minikube/cluster"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/constants"
"k8s.io/minikube/pkg/minikube/driver"
"k8s.io/minikube/pkg/minikube/exit"
"k8s.io/minikube/pkg/minikube/machine"
"k8s.io/minikube/pkg/minikube/notify"
@ -35,7 +34,6 @@ import (
"k8s.io/minikube/pkg/minikube/style"
"github.com/docker/machine/libmachine"
"github.com/docker/machine/libmachine/state"
"github.com/olekukonko/tablewriter"
"github.com/spf13/cobra"
@ -115,77 +113,14 @@ func profileStatus(p *config.Profile, api libmachine.API) string {
if len(cps) == 0 {
exit.Message(reason.GuestCpConfig, "No control-plane nodes found.")
}
status := "Unknown"
healthyCPs := 0
for _, cp := range cps {
machineName := config.MachineName(*p.Config, cp)
ms, err := machine.Status(api, machineName)
statuses, err := cluster.GetStatus(api, p.Config)
if err != nil {
klog.Warningf("error loading profile (will continue): machine status for %s: %v", machineName, err)
continue
}
if ms != state.Running.String() {
klog.Warningf("error loading profile (will continue): machine %s is not running: %q", machineName, ms)
status = ms
continue
klog.Errorf("error getting statuses: %v", err)
return "Unknown"
}
clusterStatus := cluster.GetState(statuses, ClusterFlagValue(), p.Config)
host, err := machine.LoadHost(api, machineName)
if err != nil {
klog.Warningf("error loading profile (will continue): load host for %s: %v", machineName, err)
continue
}
hs, err := host.Driver.GetState()
if err != nil {
klog.Warningf("error loading profile (will continue): host state for %s: %v", machineName, err)
continue
}
if hs != state.Running {
klog.Warningf("error loading profile (will continue): host %s is not running: %q", machineName, hs)
status = hs.String()
continue
}
cr, err := machine.CommandRunner(host)
if err != nil {
klog.Warningf("error loading profile (will continue): command runner for %s: %v", machineName, err)
continue
}
hostname, _, port, err := driver.ControlPlaneEndpoint(p.Config, &cp, host.DriverName)
if err != nil {
klog.Warningf("error loading profile (will continue): control-plane endpoint for %s: %v", machineName, err)
continue
}
as, err := kverify.APIServerStatus(cr, hostname, port)
if err != nil {
klog.Warningf("error loading profile (will continue): apiserver status for %s: %v", machineName, err)
continue
}
status = as.String()
if as != state.Running {
klog.Warningf("error loading profile (will continue): apiserver %s is not running: %q", machineName, hs)
continue
}
healthyCPs++
}
if config.IsHA(*p.Config) {
switch {
case healthyCPs < 2:
return state.Stopped.String()
case healthyCPs == 2:
return "Degraded"
default:
return "HAppy"
}
}
return status
return clusterStatus.StatusName
}
func renderProfilesTable(ps [][]string) {

View File

@ -17,39 +17,27 @@ limitations under the License.
package cmd
import (
"bufio"
"encoding/json"
"fmt"
"io"
"os"
"strconv"
"strings"
"text/template"
"time"
cloudevents "github.com/cloudevents/sdk-go/v2"
"github.com/docker/machine/libmachine"
"github.com/docker/machine/libmachine/state"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"k8s.io/klog/v2"
"k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify"
"k8s.io/minikube/pkg/minikube/cluster"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/constants"
"k8s.io/minikube/pkg/minikube/driver"
"k8s.io/minikube/pkg/minikube/exit"
"k8s.io/minikube/pkg/minikube/kubeconfig"
"k8s.io/minikube/pkg/minikube/localpath"
"k8s.io/minikube/pkg/minikube/machine"
"k8s.io/minikube/pkg/minikube/mustload"
"k8s.io/minikube/pkg/minikube/node"
"k8s.io/minikube/pkg/minikube/notify"
"k8s.io/minikube/pkg/minikube/out"
"k8s.io/minikube/pkg/minikube/out/register"
"k8s.io/minikube/pkg/minikube/reason"
"k8s.io/minikube/pkg/version"
)
var (
@ -59,124 +47,6 @@ var (
watch time.Duration
)
// Additional legacy states
const (
// Configured means configured
Configured = "Configured" // ~state.Saved
// Misconfigured means misconfigured
Misconfigured = "Misconfigured" // ~state.Error
// Nonexistent means the resource does not exist
Nonexistent = "Nonexistent" // ~state.None
// Irrelevant is used for statuses that aren't meaningful for worker nodes
Irrelevant = "Irrelevant"
)
// New status modes, based roughly on HTTP/SMTP standards
const (
// 1xx signifies a transitional state. If retried, it will soon return a 2xx, 4xx, or 5xx
Starting = 100
Pausing = 101
Unpausing = 102
Stopping = 110
Deleting = 120
// 2xx signifies that the API Server is able to service requests
OK = 200
Warning = 203
// 4xx signifies an error that requires help from the client to resolve
NotFound = 404
Stopped = 405
Paused = 418 // I'm a teapot!
// 5xx signifies a server-side error (that may be retryable)
Error = 500
InsufficientStorage = 507
Unknown = 520
)
var (
exitCodeToHTTPCode = map[int]int{
// exit code 26 corresponds to insufficient storage
26: 507,
}
codeNames = map[int]string{
100: "Starting",
101: "Pausing",
102: "Unpausing",
110: "Stopping",
103: "Deleting",
200: "OK",
203: "Warning",
404: "NotFound",
405: "Stopped",
418: "Paused",
500: "Error",
507: "InsufficientStorage",
520: "Unknown",
}
codeDetails = map[int]string{
507: "/var is almost out of disk space",
}
)
// Status holds string representations of component states
type Status struct {
Name string
Host string
Kubelet string
APIServer string
Kubeconfig string
Worker bool
TimeToStop string `json:",omitempty"`
DockerEnv string `json:",omitempty"`
PodManEnv string `json:",omitempty"`
}
// ClusterState holds a cluster state representation
type ClusterState struct {
BaseState
BinaryVersion string
TimeToStop string `json:",omitempty"`
Components map[string]BaseState
Nodes []NodeState
}
// NodeState holds a node state representation
type NodeState struct {
BaseState
Components map[string]BaseState `json:",omitempty"`
}
// BaseState holds a component state representation, such as "apiserver" or "kubeconfig"
type BaseState struct {
// Name is the name of the object
Name string
// StatusCode is an HTTP-like status code for this object
StatusCode int
// Name is a human-readable name for the status code
StatusName string
// StatusDetail is long human-readable string describing why this particular status code was chosen
StatusDetail string `json:",omitempty"` // Not yet implemented
// Step is which workflow step the object is at.
Step string `json:",omitempty"`
// StepDetail is a long human-readable string describing the step
StepDetail string `json:",omitempty"`
}
const (
minikubeNotRunningStatusFlag = 1 << 0
clusterNotRunningStatusFlag = 1 << 1
@ -236,7 +106,7 @@ var statusCmd = &cobra.Command{
// writeStatusesAtInterval writes statuses in a given output format - at intervals defined by duration
func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *config.ClusterConfig) {
for {
var statuses []*Status
var statuses []*cluster.Status
if nodeName != "" || statusFormat != defaultStatusFormat && len(cc.Nodes) > 1 {
n, _, err := node.Retrieve(*cc, nodeName)
@ -244,26 +114,17 @@ func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *con
exit.Error(reason.GuestNodeRetrieve, "retrieving node", err)
}
st, err := nodeStatus(api, *cc, *n)
st, err := cluster.NodeStatus(api, *cc, *n)
if err != nil {
klog.Errorf("status error: %v", err)
}
statuses = append(statuses, st)
} else {
for _, n := range cc.Nodes {
machineName := config.MachineName(*cc, n)
klog.Infof("checking status of %s ...", machineName)
st, err := nodeStatus(api, *cc, n)
klog.Infof("%s status: %+v", machineName, st)
var err error
statuses, err = cluster.GetStatus(api, cc)
if err != nil {
klog.Errorf("status error: %v", err)
}
if st.Host == Nonexistent {
klog.Errorf("The %q host does not exist!", machineName)
}
statuses = append(statuses, st)
}
}
switch output {
@ -276,7 +137,7 @@ func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *con
case "json":
// Layout is currently only supported for JSON mode
if layout == "cluster" {
if err := clusterStatusJSON(statuses, os.Stdout); err != nil {
if err := clusterStatusJSON(statuses, os.Stdout, cc); err != nil {
exit.Error(reason.InternalStatusJSON, "status json failure", err)
}
} else {
@ -296,141 +157,22 @@ func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *con
}
// exitCode calculates the appropriate exit code given a set of status messages
func exitCode(statuses []*Status) int {
func exitCode(statuses []*cluster.Status) int {
c := 0
for _, st := range statuses {
if st.Host != state.Running.String() {
c |= minikubeNotRunningStatusFlag
}
if (st.APIServer != state.Running.String() && st.APIServer != Irrelevant) || st.Kubelet != state.Running.String() {
if (st.APIServer != state.Running.String() && st.APIServer != cluster.Irrelevant) || st.Kubelet != state.Running.String() {
c |= clusterNotRunningStatusFlag
}
if st.Kubeconfig != Configured && st.Kubeconfig != Irrelevant {
if st.Kubeconfig != cluster.Configured && st.Kubeconfig != cluster.Irrelevant {
c |= k8sNotRunningStatusFlag
}
}
return c
}
// nodeStatus looks up the status of a node
func nodeStatus(api libmachine.API, cc config.ClusterConfig, n config.Node) (*Status, error) {
controlPlane := n.ControlPlane
name := config.MachineName(cc, n)
st := &Status{
Name: name,
Host: Nonexistent,
APIServer: Nonexistent,
Kubelet: Nonexistent,
Kubeconfig: Nonexistent,
Worker: !controlPlane,
}
hs, err := machine.Status(api, name)
klog.Infof("%s host status = %q (err=%v)", name, hs, err)
if err != nil {
return st, errors.Wrap(err, "host")
}
// We have no record of this host. Return nonexistent struct
if hs == state.None.String() {
return st, nil
}
st.Host = hs
// If it's not running, quickly bail out rather than delivering conflicting messages
if st.Host != state.Running.String() {
klog.Infof("host is not running, skipping remaining checks")
st.APIServer = st.Host
st.Kubelet = st.Host
st.Kubeconfig = st.Host
return st, nil
}
// We have a fully operational host, now we can check for details
if _, err := cluster.DriverIP(api, name); err != nil {
klog.Errorf("failed to get driver ip: %v", err)
st.Host = state.Error.String()
return st, err
}
st.Kubeconfig = Configured
if !controlPlane {
st.Kubeconfig = Irrelevant
st.APIServer = Irrelevant
}
host, err := machine.LoadHost(api, name)
if err != nil {
return st, err
}
cr, err := machine.CommandRunner(host)
if err != nil {
return st, err
}
// Check storage
p, err := machine.DiskUsed(cr, "/var")
if err != nil {
klog.Errorf("failed to get storage capacity of /var: %v", err)
st.Host = state.Error.String()
return st, err
}
if p >= 99 {
st.Host = codeNames[InsufficientStorage]
}
stk := kverify.ServiceStatus(cr, "kubelet")
st.Kubelet = stk.String()
if cc.ScheduledStop != nil {
initiationTime := time.Unix(cc.ScheduledStop.InitiationTime, 0)
st.TimeToStop = time.Until(initiationTime.Add(cc.ScheduledStop.Duration)).String()
}
if os.Getenv(constants.MinikubeActiveDockerdEnv) != "" {
st.DockerEnv = "in-use"
}
if os.Getenv(constants.MinikubeActivePodmanEnv) != "" {
st.PodManEnv = "in-use"
}
// Early exit for worker nodes
if !controlPlane {
return st, nil
}
var hostname string
var port int
if cc.Addons["auto-pause"] {
hostname, _, port, err = driver.AutoPauseProxyEndpoint(&cc, &n, host.DriverName)
} else {
hostname = cc.KubernetesConfig.APIServerHAVIP
port = cc.APIServerPort
if !config.IsHA(cc) || driver.NeedsPortForward(cc.Driver) {
hostname, _, port, err = driver.ControlPlaneEndpoint(&cc, &n, host.DriverName)
}
}
if err != nil {
klog.Errorf("forwarded endpoint: %v", err)
st.Kubeconfig = Misconfigured
} else if err := kubeconfig.VerifyEndpoint(cc.Name, hostname, port, ""); err != nil && st.Host != state.Starting.String() {
klog.Errorf("kubeconfig endpoint: %v", err)
st.Kubeconfig = Misconfigured
}
sta, err := kverify.APIServerStatus(cr, hostname, port)
klog.Infof("%s apiserver status = %s (err=%v)", name, stk, err)
if err != nil {
klog.Errorln("Error apiserver status:", err)
st.APIServer = state.Error.String()
} else {
st.APIServer = sta.String()
}
return st, nil
}
func init() {
statusCmd.Flags().StringVarP(&statusFormat, "format", "f", defaultStatusFormat,
`Go template format string for the status output. The format for Go templates can be found here: https://pkg.go.dev/text/template
@ -444,7 +186,7 @@ For the list accessible variables for the template, see the struct values here:
statusCmd.Flags().Lookup("watch").NoOptDefVal = "1s"
}
func statusText(st *Status, w io.Writer) error {
func statusText(st *cluster.Status, w io.Writer) error {
tmpl, err := template.New("status").Parse(statusFormat)
if st.Worker && statusFormat == defaultStatusFormat {
tmpl, err = template.New("worker-status").Parse(workerStatusFormat)
@ -455,14 +197,14 @@ func statusText(st *Status, w io.Writer) error {
if err := tmpl.Execute(w, st); err != nil {
return err
}
if st.Kubeconfig == Misconfigured {
if st.Kubeconfig == cluster.Misconfigured {
_, err := w.Write([]byte("\nWARNING: Your kubectl is pointing to stale minikube-vm.\nTo fix the kubectl context, run `minikube update-context`\n"))
return err
}
return nil
}
func statusJSON(st []*Status, w io.Writer) error {
func statusJSON(st []*cluster.Status, w io.Writer) error {
var js []byte
var err error
// Keep backwards compat with single node clusters to not break anyone
@ -478,185 +220,8 @@ func statusJSON(st []*Status, w io.Writer) error {
return err
}
// readEventLog reads cloudevent logs from $MINIKUBE_HOME/profiles/<name>/events.json
func readEventLog(name string) ([]cloudevents.Event, time.Time, error) {
path := localpath.EventLog(name)
st, err := os.Stat(path)
if err != nil {
return nil, time.Time{}, errors.Wrap(err, "stat")
}
f, err := os.Open(path)
if err != nil {
return nil, st.ModTime(), errors.Wrap(err, "open")
}
var events []cloudevents.Event
scanner := bufio.NewScanner(f)
for scanner.Scan() {
ev := cloudevents.NewEvent()
if err = json.Unmarshal(scanner.Bytes(), &ev); err != nil {
return events, st.ModTime(), err
}
events = append(events, ev)
}
return events, st.ModTime(), scanner.Err()
}
// clusterState converts Status structs into a ClusterState struct
func clusterState(sts []*Status) ClusterState {
statusName := sts[0].APIServer
if sts[0].Host == codeNames[InsufficientStorage] {
statusName = sts[0].Host
}
sc := statusCode(statusName)
cs := ClusterState{
BinaryVersion: version.GetVersion(),
BaseState: BaseState{
Name: ClusterFlagValue(),
StatusCode: sc,
StatusName: statusName,
StatusDetail: codeDetails[sc],
},
TimeToStop: sts[0].TimeToStop,
Components: map[string]BaseState{
"kubeconfig": {Name: "kubeconfig", StatusCode: statusCode(sts[0].Kubeconfig), StatusName: codeNames[statusCode(sts[0].Kubeconfig)]},
},
}
for _, st := range sts {
ns := NodeState{
BaseState: BaseState{
Name: st.Name,
StatusCode: statusCode(st.Host),
},
Components: map[string]BaseState{
"kubelet": {Name: "kubelet", StatusCode: statusCode(st.Kubelet)},
},
}
if st.APIServer != Irrelevant {
ns.Components["apiserver"] = BaseState{Name: "apiserver", StatusCode: statusCode(st.APIServer)}
}
// Convert status codes to status names
ns.StatusName = codeNames[ns.StatusCode]
for k, v := range ns.Components {
v.StatusName = codeNames[v.StatusCode]
ns.Components[k] = v
}
cs.Nodes = append(cs.Nodes, ns)
}
evs, mtime, err := readEventLog(sts[0].Name)
if err != nil {
klog.Errorf("unable to read event log: %v", err)
return cs
}
transientCode := 0
var finalStep map[string]string
for _, ev := range evs {
// klog.Infof("read event: %+v", ev)
if ev.Type() == "io.k8s.sigs.minikube.step" {
var data map[string]string
err := ev.DataAs(&data)
if err != nil {
klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data())
continue
}
switch data["name"] {
case string(register.InitialSetup):
transientCode = Starting
case string(register.Done):
transientCode = 0
case string(register.Stopping):
klog.Infof("%q == %q", data["name"], register.Stopping)
transientCode = Stopping
case string(register.Deleting):
transientCode = Deleting
case string(register.Pausing):
transientCode = Pausing
case string(register.Unpausing):
transientCode = Unpausing
}
finalStep = data
klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data)
}
if ev.Type() == "io.k8s.sigs.minikube.error" {
var data map[string]string
err := ev.DataAs(&data)
if err != nil {
klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data())
continue
}
exitCode, err := strconv.Atoi(data["exitcode"])
if err != nil {
klog.Errorf("exit code not found: %v", err)
continue
}
if val, ok := exitCodeToHTTPCode[exitCode]; ok {
exitCode = val
}
transientCode = exitCode
for _, n := range cs.Nodes {
n.StatusCode = transientCode
n.StatusName = codeNames[n.StatusCode]
}
klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data)
}
}
if finalStep != nil {
if mtime.Before(time.Now().Add(-10 * time.Minute)) {
klog.Warningf("event stream is too old (%s) to be considered a transient state", mtime)
} else {
cs.Step = strings.TrimSpace(finalStep["name"])
cs.StepDetail = strings.TrimSpace(finalStep["message"])
if transientCode != 0 {
cs.StatusCode = transientCode
}
}
}
cs.StatusName = codeNames[cs.StatusCode]
cs.StatusDetail = codeDetails[cs.StatusCode]
return cs
}
// statusCode returns a status code number given a name
func statusCode(st string) int {
// legacy names
switch st {
case "Running", "Configured":
return OK
case "Misconfigured":
return Error
}
// new names
for code, name := range codeNames {
if name == st {
return code
}
}
return Unknown
}
func clusterStatusJSON(statuses []*Status, w io.Writer) error {
cs := clusterState(statuses)
func clusterStatusJSON(statuses []*cluster.Status, w io.Writer, cc *config.ClusterConfig) error {
cs := cluster.GetState(statuses, ClusterFlagValue(), cc)
bs, err := json.Marshal(cs)
if err != nil {

View File

@ -20,22 +20,24 @@ import (
"bytes"
"encoding/json"
"testing"
"k8s.io/minikube/pkg/minikube/cluster"
)
func TestExitCode(t *testing.T) {
var tests = []struct {
name string
want int
state *Status
state *cluster.Status
}{
{"ok", 0, &Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured}},
{"paused", 2, &Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}},
{"down", 7, &Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}},
{"missing", 7, &Status{Host: "Nonexistent", Kubelet: "Nonexistent", APIServer: "Nonexistent", Kubeconfig: "Nonexistent"}},
{"ok", 0, &cluster.Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: cluster.Configured}},
{"paused", 2, &cluster.Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: cluster.Configured}},
{"down", 7, &cluster.Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: cluster.Misconfigured}},
{"missing", 7, &cluster.Status{Host: "Nonexistent", Kubelet: "Nonexistent", APIServer: "Nonexistent", Kubeconfig: "Nonexistent"}},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := exitCode([]*Status{tc.state})
got := exitCode([]*cluster.Status{tc.state})
if got != tc.want {
t.Errorf("exitcode(%+v) = %d, want: %d", tc.state, got, tc.want)
}
@ -46,22 +48,22 @@ func TestExitCode(t *testing.T) {
func TestStatusText(t *testing.T) {
var tests = []struct {
name string
state *Status
state *cluster.Status
want string
}{
{
name: "ok",
state: &Status{Name: "minikube", Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured, TimeToStop: "10m"},
state: &cluster.Status{Name: "minikube", Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: cluster.Configured, TimeToStop: "10m"},
want: "minikube\ntype: Control Plane\nhost: Running\nkubelet: Running\napiserver: Running\nkubeconfig: Configured\ntimeToStop: 10m\n\n",
},
{
name: "paused",
state: &Status{Name: "minikube", Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured},
state: &cluster.Status{Name: "minikube", Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: cluster.Configured},
want: "minikube\ntype: Control Plane\nhost: Running\nkubelet: Stopped\napiserver: Paused\nkubeconfig: Configured\n\n",
},
{
name: "down",
state: &Status{Name: "minikube", Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured},
state: &cluster.Status{Name: "minikube", Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: cluster.Misconfigured},
want: "minikube\ntype: Control Plane\nhost: Stopped\nkubelet: Stopped\napiserver: Stopped\nkubeconfig: Misconfigured\n\n\nWARNING: Your kubectl is pointing to stale minikube-vm.\nTo fix the kubectl context, run `minikube update-context`\n",
},
}
@ -84,21 +86,21 @@ func TestStatusText(t *testing.T) {
func TestStatusJSON(t *testing.T) {
var tests = []struct {
name string
state *Status
state *cluster.Status
}{
{"ok", &Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured, TimeToStop: "10m"}},
{"paused", &Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}},
{"down", &Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}},
{"ok", &cluster.Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: cluster.Configured, TimeToStop: "10m"}},
{"paused", &cluster.Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: cluster.Configured}},
{"down", &cluster.Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: cluster.Misconfigured}},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
var b bytes.Buffer
err := statusJSON([]*Status{tc.state}, &b)
err := statusJSON([]*cluster.Status{tc.state}, &b)
if err != nil {
t.Errorf("json(%+v) error: %v", tc.state, err)
}
st := &Status{}
st := &cluster.Status{}
if err := json.Unmarshal(b.Bytes(), st); err != nil {
t.Errorf("json(%+v) unmarshal error: %v", tc.state, err)
}

View File

@ -0,0 +1,513 @@
/*
Copyright 2024 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cluster
import (
"bufio"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"time"
cloudevents "github.com/cloudevents/sdk-go/v2"
"github.com/docker/machine/libmachine"
"github.com/docker/machine/libmachine/state"
"github.com/pkg/errors"
"k8s.io/klog/v2"
"k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/constants"
"k8s.io/minikube/pkg/minikube/driver"
"k8s.io/minikube/pkg/minikube/kubeconfig"
"k8s.io/minikube/pkg/minikube/localpath"
"k8s.io/minikube/pkg/minikube/machine"
"k8s.io/minikube/pkg/minikube/out/register"
"k8s.io/minikube/pkg/version"
)
// Additional legacy states
const (
// Configured means configured
Configured = "Configured" // ~state.Saved
// Misconfigured means misconfigured
Misconfigured = "Misconfigured" // ~state.Error
// Nonexistent means the resource does not exist
Nonexistent = "Nonexistent" // ~state.None
// Irrelevant is used for statuses that aren't meaningful for worker nodes
Irrelevant = "Irrelevant"
)
// New status modes, based roughly on HTTP/SMTP standards
const (
// 1xx signifies a transitional state. If retried, it will soon return a 2xx, 4xx, or 5xx
Starting = 100
Pausing = 101
Unpausing = 102
Stopping = 110
Deleting = 120
// 2xx signifies that the API Server is able to service requests
OK = 200
OKHAppy = 201
Warning = 203
Degraded = 204
// 4xx signifies an error that requires help from the client to resolve
NotFound = 404
Stopped = 405
Paused = 418 // I'm a teapot!
// 5xx signifies a server-side error (that may be retryable)
Error = 500
InsufficientStorage = 507
Unknown = 520
)
var (
exitCodeToHTTPCode = map[int]int{
// exit code 26 corresponds to insufficient storage
26: 507,
}
codeNames = map[int]string{
100: "Starting",
101: "Pausing",
102: "Unpausing",
110: "Stopping",
103: "Deleting",
200: "OK",
201: "OKHAppy",
203: "Warning",
204: "Degraded",
404: "NotFound",
405: "Stopped",
418: "Paused",
500: "Error",
507: "InsufficientStorage",
520: "Unknown",
}
codeDetails = map[int]string{
507: "/var is almost out of disk space",
}
)
// Status holds string representations of component states
type Status struct {
Name string
Host string
Kubelet string
APIServer string
Kubeconfig string
Worker bool
TimeToStop string `json:",omitempty"`
DockerEnv string `json:",omitempty"`
PodManEnv string `json:",omitempty"`
}
// State holds a cluster state representation
//
//nolint:revive
type State struct {
BaseState
BinaryVersion string
TimeToStop string `json:",omitempty"`
Components map[string]BaseState
Nodes []NodeState
}
// NodeState holds a node state representation
type NodeState struct {
BaseState
Components map[string]BaseState `json:",omitempty"`
}
// BaseState holds a component state representation, such as "apiserver" or "kubeconfig"
type BaseState struct {
// Name is the name of the object
Name string
// StatusCode is an HTTP-like status code for this object
StatusCode int
// Name is a human-readable name for the status code
StatusName string
// StatusDetail is long human-readable string describing why this particular status code was chosen
StatusDetail string `json:",omitempty"` // Not yet implemented
// Step is which workflow step the object is at.
Step string `json:",omitempty"`
// StepDetail is a long human-readable string describing the step
StepDetail string `json:",omitempty"`
}
// GetStatus returns the statuses of each node
func GetStatus(api libmachine.API, cc *config.ClusterConfig) ([]*Status, error) {
var statuses []*Status
for _, n := range cc.Nodes {
machineName := config.MachineName(*cc, n)
klog.Infof("checking status of %s ...", machineName)
st, err := NodeStatus(api, *cc, n)
klog.Infof("%s status: %+v", machineName, st)
if err != nil {
klog.Errorf("status error: %v", err)
return nil, err
}
if st.Host == Nonexistent {
err := fmt.Errorf("the %q host does not exist", machineName)
klog.Error(err)
return nil, err
}
statuses = append(statuses, st)
}
return statuses, nil
}
// GetState converts Status structs into a State struct
//
//nolint:gocyclo
func GetState(sts []*Status, profile string, cc *config.ClusterConfig) State {
statusName := ""
if len(sts) > 0 {
statusName = sts[0].APIServer
if sts[0].Host == codeNames[InsufficientStorage] {
statusName = sts[0].Host
}
}
sc := statusCode(statusName)
cs := State{
BinaryVersion: version.GetVersion(),
BaseState: BaseState{
Name: profile,
StatusCode: sc,
StatusName: statusName,
StatusDetail: codeDetails[sc],
},
TimeToStop: sts[0].TimeToStop,
Components: map[string]BaseState{
"kubeconfig": {Name: "kubeconfig", StatusCode: statusCode(sts[0].Kubeconfig), StatusName: codeNames[statusCode(sts[0].Kubeconfig)]},
},
}
healthyCPs := 0
for _, st := range sts {
ns := NodeState{
BaseState: BaseState{
Name: st.Name,
StatusCode: statusCode(st.Host),
},
Components: map[string]BaseState{
"kubelet": {Name: "kubelet", StatusCode: statusCode(st.Kubelet)},
},
}
if st.APIServer != Irrelevant {
ns.Components["apiserver"] = BaseState{Name: "apiserver", StatusCode: statusCode(st.APIServer)}
}
// Convert status codes to status names
ns.StatusName = codeNames[ns.StatusCode]
for k, v := range ns.Components {
v.StatusName = codeNames[v.StatusCode]
ns.Components[k] = v
}
cs.Nodes = append(cs.Nodes, ns)
// we also need to calculate how many control plane node is healthy
if !st.Worker &&
st.Host == state.Running.String() &&
st.Kubeconfig == Configured &&
st.Kubelet == state.Running.String() &&
st.APIServer == state.Running.String() {
healthyCPs++
}
}
evs, mtime, err := readEventLog(sts[0].Name)
if err != nil {
klog.Errorf("unable to read event log: %v", err)
return cs
}
transientCode := 0
started := false
var finalStep map[string]string
for _, ev := range evs {
if ev.Type() == "io.k8s.sigs.minikube.step" {
var data map[string]string
err := ev.DataAs(&data)
if err != nil {
klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data())
continue
}
switch data["name"] {
case string(register.InitialSetup):
transientCode = Starting
case string(register.Done):
transientCode = 0
started = true
case string(register.Stopping):
klog.Infof("%q == %q", data["name"], register.Stopping)
transientCode = Stopping
case string(register.Deleting):
transientCode = Deleting
case string(register.Pausing):
transientCode = Pausing
case string(register.Unpausing):
transientCode = Unpausing
}
finalStep = data
klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data)
}
if ev.Type() == "io.k8s.sigs.minikube.error" {
var data map[string]string
err := ev.DataAs(&data)
if err != nil {
klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data())
continue
}
exitCode, err := strconv.Atoi(data["exitcode"])
if err != nil {
klog.Errorf("exit code not found: %v", err)
continue
}
if val, ok := exitCodeToHTTPCode[exitCode]; ok {
exitCode = val
}
transientCode = exitCode
for _, n := range cs.Nodes {
n.StatusCode = transientCode
n.StatusName = codeNames[n.StatusCode]
}
klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data)
}
}
if finalStep != nil {
if mtime.Before(time.Now().Add(-10 * time.Minute)) {
klog.Warningf("event stream is too old (%s) to be considered a transient state", mtime)
} else {
cs.Step = strings.TrimSpace(finalStep["name"])
cs.StepDetail = strings.TrimSpace(finalStep["message"])
if transientCode != 0 {
cs.StatusCode = transientCode
}
}
}
if config.IsHA(*cc) && started {
switch {
case healthyCPs < 2:
cs.StatusCode = Stopped
case healthyCPs == 2:
cs.StatusCode = Degraded
default:
cs.StatusCode = OKHAppy
}
}
cs.StatusName = codeNames[cs.StatusCode]
cs.StatusDetail = codeDetails[cs.StatusCode]
return cs
}
// NodeStatus looks up the status of a node
func NodeStatus(api libmachine.API, cc config.ClusterConfig, n config.Node) (*Status, error) {
controlPlane := n.ControlPlane
name := config.MachineName(cc, n)
st := &Status{
Name: name,
Host: Nonexistent,
APIServer: Nonexistent,
Kubelet: Nonexistent,
Kubeconfig: Nonexistent,
Worker: !controlPlane,
}
hs, err := machine.Status(api, name)
klog.Infof("%s host status = %q (err=%v)", name, hs, err)
if err != nil {
return st, errors.Wrap(err, "host")
}
// We have no record of this host. Return nonexistent struct
if hs == state.None.String() {
return st, nil
}
st.Host = hs
// If it's not running, quickly bail out rather than delivering conflicting messages
if st.Host != state.Running.String() {
klog.Infof("host is not running, skipping remaining checks")
st.APIServer = st.Host
st.Kubelet = st.Host
st.Kubeconfig = st.Host
return st, nil
}
// We have a fully operational host, now we can check for details
if _, err := DriverIP(api, name); err != nil {
klog.Errorf("failed to get driver ip: %v", err)
st.Host = state.Error.String()
return st, err
}
st.Kubeconfig = Configured
if !controlPlane {
st.Kubeconfig = Irrelevant
st.APIServer = Irrelevant
}
host, err := machine.LoadHost(api, name)
if err != nil {
return st, err
}
cr, err := machine.CommandRunner(host)
if err != nil {
return st, err
}
// Check storage
p, err := machine.DiskUsed(cr, "/var")
if err != nil {
klog.Errorf("failed to get storage capacity of /var: %v", err)
st.Host = state.Error.String()
return st, err
}
if p >= 99 {
st.Host = codeNames[InsufficientStorage]
}
stk := kverify.ServiceStatus(cr, "kubelet")
st.Kubelet = stk.String()
if cc.ScheduledStop != nil {
initiationTime := time.Unix(cc.ScheduledStop.InitiationTime, 0)
st.TimeToStop = time.Until(initiationTime.Add(cc.ScheduledStop.Duration)).String()
}
if os.Getenv(constants.MinikubeActiveDockerdEnv) != "" {
st.DockerEnv = "in-use"
}
if os.Getenv(constants.MinikubeActivePodmanEnv) != "" {
st.PodManEnv = "in-use"
}
// Early exit for worker nodes
if !controlPlane {
return st, nil
}
var hostname string
var port int
if cc.Addons["auto-pause"] {
hostname, _, port, err = driver.AutoPauseProxyEndpoint(&cc, &n, host.DriverName)
} else {
hostname = cc.KubernetesConfig.APIServerHAVIP
port = cc.APIServerPort
if !config.IsHA(cc) || driver.NeedsPortForward(cc.Driver) {
hostname, _, port, err = driver.ControlPlaneEndpoint(&cc, &n, host.DriverName)
}
}
if err != nil {
klog.Errorf("forwarded endpoint: %v", err)
st.Kubeconfig = Misconfigured
} else if err := kubeconfig.VerifyEndpoint(cc.Name, hostname, port, ""); err != nil && st.Host != state.Starting.String() {
klog.Errorf("kubeconfig endpoint: %v", err)
st.Kubeconfig = Misconfigured
}
sta, err := kverify.APIServerStatus(cr, hostname, port)
klog.Infof("%s apiserver status = %s (err=%v)", name, stk, err)
if err != nil {
klog.Errorln("Error apiserver status:", err)
st.APIServer = state.Error.String()
} else {
st.APIServer = sta.String()
}
return st, nil
}
// readEventLog reads cloudevent logs from $MINIKUBE_HOME/profiles/<name>/events.json
func readEventLog(name string) ([]cloudevents.Event, time.Time, error) {
path := localpath.EventLog(name)
st, err := os.Stat(path)
if err != nil {
return nil, time.Time{}, errors.Wrap(err, "stat")
}
f, err := os.Open(path)
if err != nil {
return nil, st.ModTime(), errors.Wrap(err, "open")
}
var events []cloudevents.Event
scanner := bufio.NewScanner(f)
for scanner.Scan() {
ev := cloudevents.NewEvent()
if err = json.Unmarshal(scanner.Bytes(), &ev); err != nil {
return events, st.ModTime(), err
}
events = append(events, ev)
}
return events, st.ModTime(), scanner.Err()
}
// statusCode returns a status code number given a name
func statusCode(st string) int {
// legacy names
switch st {
case "Running", "Configured":
return OK
case "Misconfigured":
return Error
}
// new names
for code, name := range codeNames {
if name == st {
return code
}
}
return Unknown
}

View File

@ -30,7 +30,7 @@ import (
"testing"
"time"
"k8s.io/minikube/cmd/minikube/cmd"
"k8s.io/minikube/pkg/minikube/cluster"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/util/retry"
)
@ -328,7 +328,7 @@ func validateHACopyFile(ctx context.Context, t *testing.T, profile string) {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}
var statuses []cmd.Status
var statuses []cluster.Status
if err = json.Unmarshal(rr.Stdout.Bytes(), &statuses); err != nil {
t.Errorf("failed to decode json from status: args %q: %v", rr.Command(), err)
}

View File

@ -30,7 +30,7 @@ import (
"testing"
"time"
"k8s.io/minikube/cmd/minikube/cmd"
"k8s.io/minikube/pkg/minikube/cluster"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/util/retry"
)
@ -186,7 +186,7 @@ func validateCopyFileWithMultiNode(ctx context.Context, t *testing.T, profile st
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}
var statuses []cmd.Status
var statuses []cluster.Status
if err = json.Unmarshal(rr.Stdout.Bytes(), &statuses); err != nil {
t.Errorf("failed to decode json from status: args %q: %v", rr.Command(), err)
}

View File

@ -25,7 +25,7 @@ import (
"strings"
"testing"
"k8s.io/minikube/cmd/minikube/cmd"
"k8s.io/minikube/pkg/minikube/cluster"
)
// TestPause tests minikube pause functionality
@ -191,12 +191,12 @@ func validateStatus(ctx context.Context, t *testing.T, profile string) {
defer PostMortemLogs(t, profile)
statusOutput := runStatusCmd(ctx, t, profile, false)
var cs cmd.ClusterState
var cs cluster.State
if err := json.Unmarshal(statusOutput, &cs); err != nil {
t.Fatalf("unmarshalling: %v", err)
}
// verify the status looks as we expect
if cs.StatusCode != cmd.Paused {
if cs.StatusCode != cluster.Paused {
t.Fatalf("incorrect status code: %v", cs.StatusCode)
}
if cs.StatusName != "Paused" {

View File

@ -27,7 +27,7 @@ import (
"path"
"testing"
"k8s.io/minikube/cmd/minikube/cmd"
"k8s.io/minikube/pkg/minikube/cluster"
"k8s.io/minikube/pkg/minikube/constants"
"k8s.io/minikube/pkg/minikube/localpath"
)
@ -82,19 +82,19 @@ func runStatusCmd(ctx context.Context, t *testing.T, profile string, increaseEnv
}
func verifyClusterState(t *testing.T, contents []byte) {
var cs cmd.ClusterState
var cs cluster.State
if err := json.Unmarshal(contents, &cs); err != nil {
t.Fatalf("unmarshalling: %v", err)
}
// verify the status looks as we expect
if cs.StatusCode != cmd.InsufficientStorage {
if cs.StatusCode != cluster.InsufficientStorage {
t.Fatalf("incorrect status code: %v", cs.StatusCode)
}
if cs.StatusName != "InsufficientStorage" {
t.Fatalf("incorrect status name: %v", cs.StatusName)
}
for _, n := range cs.Nodes {
if n.StatusCode != cmd.InsufficientStorage {
if n.StatusCode != cluster.InsufficientStorage {
t.Fatalf("incorrect node status code: %v", cs.StatusCode)
}
}