mirror of https://github.com/k3s-io/k3s.git
Fix managed etcd cold startup deadlock issue #2249
We should ignore --token and --server if the managed database is initialized, just like we ignore --cluster-init. If the user wants to join a new cluster, or rejoin a cluster after --cluster-reset, they need to delete the database. This a cleaner way to prevent deadlocking on quorum loss, and removes the requirement that the target of the --server argument must be online before already joined nodes can start. Signed-off-by: Brad Davidson <brad.davidson@rancher.com>pull/2300/head
parent
6c9f3d528a
commit
a3bbd58f37
|
@ -3,7 +3,7 @@ package cluster
|
|||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
|
@ -18,7 +18,7 @@ func (c *Cluster) Bootstrap(ctx context.Context) error {
|
|||
return err
|
||||
}
|
||||
|
||||
runBootstrap, err := c.shouldBootstrapLoad()
|
||||
runBootstrap, err := c.shouldBootstrapLoad(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -33,35 +33,58 @@ func (c *Cluster) Bootstrap(ctx context.Context) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *Cluster) shouldBootstrapLoad() (bool, error) {
|
||||
// shouldBootstrapLoad returns true if we need to load ControlRuntimeBootstrap data again.
|
||||
// This is controlled by a stamp file on disk that records successful bootstrap using a hash of the join token.
|
||||
func (c *Cluster) shouldBootstrapLoad(ctx context.Context) (bool, error) {
|
||||
// Non-nil managedDB indicates that the database is either initialized, initializing, or joining
|
||||
if c.managedDB != nil {
|
||||
c.runtime.HTTPBootstrap = true
|
||||
if c.config.JoinURL == "" {
|
||||
|
||||
isInitialized, err := c.managedDB.IsInitialized(ctx, c.config)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if isInitialized {
|
||||
// If the database is initialized we skip bootstrapping; if the user wants to rejoin a
|
||||
// cluster they need to delete the database.
|
||||
logrus.Infof("Managed %s cluster bootstrap already complete and initialized", c.managedDB.EndpointName())
|
||||
return false, nil
|
||||
}
|
||||
} else if c.config.JoinURL == "" {
|
||||
// Not initialized, not joining - must be initializing (cluster-init)
|
||||
logrus.Infof("Managed %s cluster initializing", c.managedDB.EndpointName())
|
||||
return false, nil
|
||||
} else {
|
||||
// Not initialized, but have a Join URL - fail if there's no token; if there is then validate it.
|
||||
if c.config.Token == "" {
|
||||
return false, errors.New(version.ProgramUpper + "_TOKEN is required to join a cluster")
|
||||
}
|
||||
|
||||
token, err := clientaccess.NormalizeAndValidateTokenForUser(c.config.JoinURL, c.config.Token, "server")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
token, err := clientaccess.NormalizeAndValidateTokenForUser(c.config.JoinURL, c.config.Token, "server")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
info, err := clientaccess.ParseAndValidateToken(c.config.JoinURL, token)
|
||||
if err != nil {
|
||||
return false, err
|
||||
info, err := clientaccess.ParseAndValidateToken(c.config.JoinURL, token)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
logrus.Infof("Managed %s cluster not yet initialized", c.managedDB.EndpointName())
|
||||
c.clientAccessInfo = info
|
||||
}
|
||||
c.clientAccessInfo = info
|
||||
}
|
||||
|
||||
// Check the stamp file to see if we have successfully bootstrapped using this token.
|
||||
// NOTE: The fact that we use a hash of the token to generate the stamp
|
||||
// means that it is unsafe to use the same token for multiple clusters.
|
||||
stamp := c.bootstrapStamp()
|
||||
if _, err := os.Stat(stamp); err == nil {
|
||||
logrus.Info("Cluster bootstrap already complete")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if c.managedDB != nil && c.config.Token == "" {
|
||||
return false, fmt.Errorf("K3S_TOKEN is required to join a cluster")
|
||||
}
|
||||
|
||||
// No errors and no bootstrap stamp, need to bootstrap.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
|
@ -98,11 +121,8 @@ func (c *Cluster) bootstrap(ctx context.Context) error {
|
|||
return c.httpBootstrap()
|
||||
}
|
||||
|
||||
if err := c.storageBootstrap(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
// Bootstrap directly from datastore
|
||||
return c.storageBootstrap(ctx)
|
||||
}
|
||||
|
||||
func (c *Cluster) bootstrapStamp() string {
|
||||
|
|
Loading…
Reference in New Issue