From 07ca63994e4e6f48403509b0f378e6b6c1f21beb Mon Sep 17 00:00:00 2001 From: Shylaja Devadiga Date: Tue, 20 Sep 2022 08:57:18 -0700 Subject: [PATCH 1/2] Add cluster reset test to nightly builds Signed-off-by: Shylaja Devadiga --- tests/e2e/clusterreset/Vagrantfile | 162 ++++++++++++++++++ tests/e2e/clusterreset/clusterreset_test.go | 181 ++++++++++++++++++++ 2 files changed, 343 insertions(+) create mode 100644 tests/e2e/clusterreset/Vagrantfile create mode 100644 tests/e2e/clusterreset/clusterreset_test.go diff --git a/tests/e2e/clusterreset/Vagrantfile b/tests/e2e/clusterreset/Vagrantfile new file mode 100644 index 0000000000..9ba3182621 --- /dev/null +++ b/tests/e2e/clusterreset/Vagrantfile @@ -0,0 +1,162 @@ +ENV['VAGRANT_NO_PARALLEL'] = 'no' +NODE_ROLES = (ENV['E2E_NODE_ROLES'] || + ["server-0", "server-1", "server-2", "agent-0", "agent-1"]) +NODE_BOXES = (ENV['E2E_NODE_BOXES'] || + ['generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004']) +GITHUB_BRANCH = (ENV['E2E_GITHUB_BRANCH'] || "master") +RELEASE_VERSION = (ENV['E2E_RELEASE_VERSION'] || "") +EXTERNAL_DB = (ENV['E2E_EXTERNAL_DB'] || "etcd") +HARDENED = (ENV['E2E_HARDENED'] || "") +RANCHER = (ENV['E2E_RANCHER'] || "") +NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i +NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 1024).to_i +# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks +NETWORK_PREFIX = "10.10.10" +install_type = "" +hardened_arg = "" + +def provision(vm, role, role_num, node_num) + vm.box = NODE_BOXES[node_num] + vm.hostname = role + # An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32 + node_ip = "#{NETWORK_PREFIX}.#{100+node_num}" + vm.network "private_network", ip: node_ip, netmask: "255.255.255.0" + + scripts_location = Dir.exists?("./scripts") ? "./scripts" : "../scripts" + vagrant_defaults = File.exists?("./vagrantdefaults.rb") ? "./vagrantdefaults.rb" : "../vagrantdefaults.rb" + load vagrant_defaults + + defaultOSConfigure(vm) + install_type = getInstallType(vm, RELEASE_VERSION, GITHUB_BRANCH) + + vm.provision "shell", inline: "ping -c 2 k3s.io" + + db_type = getDBType(role, role_num, vm) + + if !HARDENED.empty? + vm.provision "Set kernel parameters", type: "shell", path: scripts_location + "/harden.sh" + hardened_arg = "protect-kernel-defaults: true\nkube-apiserver-arg: \"enable-admission-plugins=NodeRestriction,PodSecurityPolicy,ServiceAccount\"" + end + + if role.include?("server") && role_num == 0 + vm.provision 'k3s-primary-server', type: 'k3s', run: 'once' do |k3s| + k3s.args = "server " + k3s.config = <<~YAML + token: vagrant + node-external-ip: #{NETWORK_PREFIX}.100 + flannel-iface: eth1 + tls-san: #{NETWORK_PREFIX}.100.nip.io + #{db_type} + #{hardened_arg} + YAML + k3s.env = %W[K3S_KUBECONFIG_MODE=0644 #{install_type}] + k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 + end + + elsif role.include?("server") && role_num != 0 + vm.provision 'k3s-secondary-server', type: 'k3s', run: 'once' do |k3s| + k3s.args = "server" + k3s.config = <<~YAML + server: "https://#{NETWORK_PREFIX}.100:6443" + token: vagrant + node-external-ip: #{node_ip} + flannel-iface: eth1 + #{db_type} + #{hardened_arg} + YAML + k3s.env = %W[K3S_KUBECONFIG_MODE=0644 K3S_TOKEN=vagrant #{install_type}] + k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 + end + end + + if role.include?("agent") + vm.provision 'k3s-agent', type: 'k3s', run: 'once' do |k3s| + k3s.args = "agent" + k3s.config = <<~YAML + server: "https://#{NETWORK_PREFIX}.100:6443" + token: vagrant + node-external-ip: #{node_ip} + flannel-iface: eth1 + #{db_type} + #{hardened_arg} + YAML + k3s.env = %W[K3S_KUBECONFIG_MODE=0644 #{install_type}] + k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 + end + end + if vm.box.to_s.include?("microos") + vm.provision 'k3s-reload', type: 'reload', run: 'once' + if !EXTERNAL_DB.empty? + vm.provision "shell", inline: "docker start #{EXTERNAL_DB}" + end + end + # This step does not run by default and is designed to be called by higher level tools + if !RANCHER.empty? + vm.provision "Install Rancher", type: "shell", run: "never", path: scripts_location + "/rancher.sh", args: node_ip + end +end + +def getDBType(role, role_num, vm) + + if EXTERNAL_DB == "mysql" + if role.include?("server") && role_num == 0 + dockerInstall(vm) + vm.provision "Start mysql", inline: "docker run -d -p 3306:3306 --name #{EXTERNAL_DB} -e MYSQL_ROOT_PASSWORD=e2e mysql:5.7" + vm.provision "shell", inline: "echo \"Wait for mysql to startup\"; sleep 10" + return "datastore-endpoint: 'mysql://root:e2e@tcp(#{NETWORK_PREFIX}.100:3306)/k3s'" + elsif role.include?("server") && role_num != 0 + return "datastore-endpoint: 'mysql://root:e2e@tcp(#{NETWORK_PREFIX}.100:3306)/k3s'" + end + + elsif EXTERNAL_DB == "postgres" + if role.include?("server") && role_num == 0 + dockerInstall(vm) + vm.provision "Start postgres", type: "shell", inline: "docker run -d -p 5432:5432 --name postgres -e POSTGRES_PASSWORD=e2e postgres:14-alpine" + vm.provision "shell", inline: "echo \"Wait for postgres to startup\"; sleep 10" + return "datastore-endpoint: 'postgres://postgres:e2e@#{NETWORK_PREFIX}.100:5432/k3s?sslmode=disable'" + elsif role.include?("server") && role_num != 0 + return "datastore-endpoint: 'postgres://postgres:e2e@#{NETWORK_PREFIX}.100:5432/k3s?sslmode=disable'" + end + + elsif ( EXTERNAL_DB == "" || EXTERNAL_DB == "etcd" ) + if role.include?("server") && role_num == 0 + return "cluster-init: true" + end + elsif ( EXTERNAL_DB == "none" ) + # Use internal sqlite, only valid for single node clusters + else + puts "Unknown EXTERNAL_DB: " + EXTERNAL_DB + abort + end + return "" +end + +Vagrant.configure("2") do |config| + config.vagrant.plugins = ["vagrant-k3s", "vagrant-reload"] + # Default provider is libvirt, virtualbox is only provided as a backup + config.vm.provider "libvirt" do |v| + v.cpus = NODE_CPUS + v.memory = NODE_MEMORY + end + config.vm.provider "virtualbox" do |v| + v.cpus = NODE_CPUS + v.memory = NODE_MEMORY + end + + if NODE_ROLES.kind_of?(String) + NODE_ROLES = NODE_ROLES.split(" ", -1) + end + if NODE_BOXES.kind_of?(String) + NODE_BOXES = NODE_BOXES.split(" ", -1) + end + + # Must iterate on the index, vagrant does not understand iterating + # over the node roles themselves + NODE_ROLES.length.times do |i| + name = NODE_ROLES[i] + role_num = name.split("-", -1).pop.to_i + config.vm.define name do |node| + provision(node.vm, name, role_num, i) + end + end +end diff --git a/tests/e2e/clusterreset/clusterreset_test.go b/tests/e2e/clusterreset/clusterreset_test.go new file mode 100644 index 0000000000..2c65ded961 --- /dev/null +++ b/tests/e2e/clusterreset/clusterreset_test.go @@ -0,0 +1,181 @@ +package clusterreset + +import ( + "flag" + "fmt" + "os" + "strings" + "testing" + + "github.com/k3s-io/k3s/tests/e2e" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Valid nodeOS: +// generic/ubuntu2004, generic/centos7, generic/rocky8, +// opensuse/Leap-15.3.x86_64, dweomer/microos.amd64 +var nodeOS = flag.String("nodeOS", "generic/ubuntu2004", "VM operating system") +var serverCount = flag.Int("serverCount", 3, "number of server nodes") +var agentCount = flag.Int("agentCount", 2, "number of agent nodes") +var hardened = flag.Bool("hardened", false, "true or false") +var ci = flag.Bool("ci", false, "running on CI") +var local = flag.Bool("local", false, "deploy a locally built K3s binary") + +// Environment Variables Info: +// E2E_EXTERNAL_DB: mysql, postgres, etcd (default: etcd) +// E2E_RELEASE_VERSION=v1.23.1+k3s2 (default: latest commit from master) + +func Test_E2EClusterReset(t *testing.T) { + RegisterFailHandler(Fail) + flag.Parse() + RunSpecs(t, "Create Cluster Test Suite") +} + +var ( + kubeConfigFile string + serverNodeNames []string + agentNodeNames []string +) + +var _ = Describe("Verify Create", Ordered, func() { + Context("Cluster :", func() { + It("Starts up with no issues", func() { + var err error + if *local { + serverNodeNames, agentNodeNames, err = e2e.CreateLocalCluster(*nodeOS, *serverCount, *agentCount) + } else { + serverNodeNames, agentNodeNames, err = e2e.CreateCluster(*nodeOS, *serverCount, *agentCount) + } + Expect(err).NotTo(HaveOccurred(), e2e.GetVagrantLog()) + fmt.Println("CLUSTER CONFIG") + fmt.Println("OS:", *nodeOS) + fmt.Println("Server Nodes:", serverNodeNames) + fmt.Println("Agent Nodes:", agentNodeNames) + kubeConfigFile, err = e2e.GenKubeConfigFile(serverNodeNames[0]) + Expect(err).NotTo(HaveOccurred()) + Expect(e2e.DockerLogin(kubeConfigFile, *ci)).To(Succeed()) + }) + + It("Checks Node and Pod Status", func() { + fmt.Printf("\nFetching node status\n") + Eventually(func(g Gomega) { + nodes, err := e2e.ParseNodes(kubeConfigFile, false) + g.Expect(err).NotTo(HaveOccurred()) + for _, node := range nodes { + g.Expect(node.Status).Should(Equal("Ready")) + } + }, "420s", "5s").Should(Succeed()) + _, _ = e2e.ParseNodes(kubeConfigFile, true) + + fmt.Printf("\nFetching Pods status\n") + Eventually(func(g Gomega) { + pods, err := e2e.ParsePods(kubeConfigFile, false) + g.Expect(err).NotTo(HaveOccurred()) + for _, pod := range pods { + if strings.Contains(pod.Name, "helm-install") { + g.Expect(pod.Status).Should(Equal("Completed"), pod.Name) + } else { + g.Expect(pod.Status).Should(Equal("Running"), pod.Name) + } + } + }, "420s", "5s").Should(Succeed()) + _, _ = e2e.ParsePods(kubeConfigFile, true) + }) + + It("Verifies ClusterReset Functionality", func() { + Eventually(func(g Gomega) { + for _, nodeName := range serverNodeNames { + if nodeName != "server-0" { + cmd := "sudo systemctl stop k3s" + _, err := e2e.RunCmdOnNode(cmd, nodeName) + Expect(err).NotTo(HaveOccurred()) + } + } + + cmd := "sudo systemctl stop k3s" + _, err := e2e.RunCmdOnNode(cmd, "server-0") + Expect(err).NotTo(HaveOccurred()) + + cmd = "sudo k3s server --cluster-reset" + res, err := e2e.RunCmdOnNode(cmd, "server-0") + Expect(err).NotTo(HaveOccurred()) + Expect(res).Should(ContainSubstring("cluster-reset")) + + cmd = "sudo systemctl start k3s" + _, err = e2e.RunCmdOnNode(cmd, "server-0") + Expect(err).NotTo(HaveOccurred()) + + fmt.Printf("\nFetching node status\n") + Eventually(func(g Gomega) { + nodes, err := e2e.ParseNodes(kubeConfigFile, false) + g.Expect(err).NotTo(HaveOccurred()) + for _, node := range nodes { + g.Expect(node.Status).Should(Equal("Ready")) + } + }, "420s", "5s").Should(Succeed()) + _, _ = e2e.ParseNodes(kubeConfigFile, true) + + fmt.Printf("\nFetching Pods status\n") + Eventually(func(g Gomega) { + pods, err := e2e.ParsePods(kubeConfigFile, false) + g.Expect(err).NotTo(HaveOccurred()) + for _, pod := range pods { + if strings.Contains(pod.Name, "helm-install") { + g.Expect(pod.Status).Should(Equal("Completed"), pod.Name) + } else { + g.Expect(pod.Status).Should(Equal("Running"), pod.Name) + } + } + }, "420s", "5s").Should(Succeed()) + _, _ = e2e.ParsePods(kubeConfigFile, true) + for _, nodeName := range serverNodeNames { + if nodeName != "server-0" { + cmd := "sudo rm -rf /var/lib/rancher/k3s/server/db" + _, err := e2e.RunCmdOnNode(cmd, nodeName) + Expect(err).NotTo(HaveOccurred()) + cmd = "sudo systemctl restart k3s" + _, err = e2e.RunCmdOnNode(cmd, nodeName) + Expect(err).NotTo(HaveOccurred()) + } + } + Eventually(func(g Gomega) { + nodes, err := e2e.ParseNodes(kubeConfigFile, false) + g.Expect(err).NotTo(HaveOccurred()) + for _, node := range nodes { + g.Expect(node.Status).Should(Equal("Ready")) + } + }, "420s", "5s").Should(Succeed()) + _, _ = e2e.ParseNodes(kubeConfigFile, true) + + fmt.Printf("\nFetching Pods status\n") + Eventually(func(g Gomega) { + pods, err := e2e.ParsePods(kubeConfigFile, false) + g.Expect(err).NotTo(HaveOccurred()) + for _, pod := range pods { + if strings.Contains(pod.Name, "helm-install") { + g.Expect(pod.Status).Should(Equal("Completed"), pod.Name) + } else { + g.Expect(pod.Status).Should(Equal("Running"), pod.Name) + } + } + }, "420s", "5s").Should(Succeed()) + _, _ = e2e.ParsePods(kubeConfigFile, true) + }, "240s", "5s").Should(Succeed()) + }) + }) +}) + +var failed = false +var _ = AfterEach(func() { + failed = failed || CurrentSpecReport().Failed() +}) + +var _ = AfterSuite(func() { + if failed && !*ci { + fmt.Println("FAILED!") + } else { + Expect(e2e.DestroyCluster()).To(Succeed()) + Expect(os.Remove(kubeConfigFile)).To(Succeed()) + } +}) From e947cd9fe3502083e7d1d3eaf1dbb971446555b9 Mon Sep 17 00:00:00 2001 From: Shylaja Devadiga Date: Tue, 20 Sep 2022 08:58:00 -0700 Subject: [PATCH 2/2] Add cluster reset test to nightly builds Signed-off-by: Shylaja Devadiga --- tests/e2e/clusterreset/Vagrantfile | 28 +-------------------- tests/e2e/clusterreset/clusterreset_test.go | 13 ++++++---- 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/tests/e2e/clusterreset/Vagrantfile b/tests/e2e/clusterreset/Vagrantfile index 9ba3182621..1bdf74e85d 100644 --- a/tests/e2e/clusterreset/Vagrantfile +++ b/tests/e2e/clusterreset/Vagrantfile @@ -7,7 +7,6 @@ GITHUB_BRANCH = (ENV['E2E_GITHUB_BRANCH'] || "master") RELEASE_VERSION = (ENV['E2E_RELEASE_VERSION'] || "") EXTERNAL_DB = (ENV['E2E_EXTERNAL_DB'] || "etcd") HARDENED = (ENV['E2E_HARDENED'] || "") -RANCHER = (ENV['E2E_RANCHER'] || "") NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 1024).to_i # Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks @@ -90,35 +89,10 @@ def provision(vm, role, role_num, node_num) vm.provision "shell", inline: "docker start #{EXTERNAL_DB}" end end - # This step does not run by default and is designed to be called by higher level tools - if !RANCHER.empty? - vm.provision "Install Rancher", type: "shell", run: "never", path: scripts_location + "/rancher.sh", args: node_ip - end end def getDBType(role, role_num, vm) - - if EXTERNAL_DB == "mysql" - if role.include?("server") && role_num == 0 - dockerInstall(vm) - vm.provision "Start mysql", inline: "docker run -d -p 3306:3306 --name #{EXTERNAL_DB} -e MYSQL_ROOT_PASSWORD=e2e mysql:5.7" - vm.provision "shell", inline: "echo \"Wait for mysql to startup\"; sleep 10" - return "datastore-endpoint: 'mysql://root:e2e@tcp(#{NETWORK_PREFIX}.100:3306)/k3s'" - elsif role.include?("server") && role_num != 0 - return "datastore-endpoint: 'mysql://root:e2e@tcp(#{NETWORK_PREFIX}.100:3306)/k3s'" - end - - elsif EXTERNAL_DB == "postgres" - if role.include?("server") && role_num == 0 - dockerInstall(vm) - vm.provision "Start postgres", type: "shell", inline: "docker run -d -p 5432:5432 --name postgres -e POSTGRES_PASSWORD=e2e postgres:14-alpine" - vm.provision "shell", inline: "echo \"Wait for postgres to startup\"; sleep 10" - return "datastore-endpoint: 'postgres://postgres:e2e@#{NETWORK_PREFIX}.100:5432/k3s?sslmode=disable'" - elsif role.include?("server") && role_num != 0 - return "datastore-endpoint: 'postgres://postgres:e2e@#{NETWORK_PREFIX}.100:5432/k3s?sslmode=disable'" - end - - elsif ( EXTERNAL_DB == "" || EXTERNAL_DB == "etcd" ) + if ( EXTERNAL_DB == "" || EXTERNAL_DB == "etcd" ) if role.include?("server") && role_num == 0 return "cluster-init: true" end diff --git a/tests/e2e/clusterreset/clusterreset_test.go b/tests/e2e/clusterreset/clusterreset_test.go index 2c65ded961..a07ebee056 100644 --- a/tests/e2e/clusterreset/clusterreset_test.go +++ b/tests/e2e/clusterreset/clusterreset_test.go @@ -29,7 +29,7 @@ var local = flag.Bool("local", false, "deploy a locally built K3s binary") func Test_E2EClusterReset(t *testing.T) { RegisterFailHandler(Fail) flag.Parse() - RunSpecs(t, "Create Cluster Test Suite") + RunSpecs(t, "Create ClusterReset Test Suite") } var ( @@ -54,7 +54,6 @@ var _ = Describe("Verify Create", Ordered, func() { fmt.Println("Agent Nodes:", agentNodeNames) kubeConfigFile, err = e2e.GenKubeConfigFile(serverNodeNames[0]) Expect(err).NotTo(HaveOccurred()) - Expect(e2e.DockerLogin(kubeConfigFile, *ci)).To(Succeed()) }) It("Checks Node and Pod Status", func() { @@ -100,7 +99,7 @@ var _ = Describe("Verify Create", Ordered, func() { cmd = "sudo k3s server --cluster-reset" res, err := e2e.RunCmdOnNode(cmd, "server-0") Expect(err).NotTo(HaveOccurred()) - Expect(res).Should(ContainSubstring("cluster-reset")) + Expect(res).Should(ContainSubstring("Managed etcd cluster membership has been reset, restart without --cluster-reset flag now")) cmd = "sudo systemctl start k3s" _, err = e2e.RunCmdOnNode(cmd, "server-0") @@ -111,9 +110,13 @@ var _ = Describe("Verify Create", Ordered, func() { nodes, err := e2e.ParseNodes(kubeConfigFile, false) g.Expect(err).NotTo(HaveOccurred()) for _, node := range nodes { - g.Expect(node.Status).Should(Equal("Ready")) + if strings.Contains(node.Name, "server-0") || strings.Contains(node.Name, "agent-") { + g.Expect(node.Status).Should(Equal("Ready")) + } else { + g.Expect(node.Status).Should(Equal("NotReady")) + } } - }, "420s", "5s").Should(Succeed()) + }, "480s", "5s").Should(Succeed()) _, _ = e2e.ParseNodes(kubeConfigFile, true) fmt.Printf("\nFetching Pods status\n")