From e9cf14ffb404d01d3e775e553db395443b201f67 Mon Sep 17 00:00:00 2001
From: Kenneth Owens <kowens0826@gmail.com>
Date: Wed, 14 Dec 2016 13:56:21 -0800
Subject: [PATCH] Adds zookeeper example (#1894)

* Initial commit

* Adds section for cleanup
Corrects some spelling errors
decapitalizes liveness and readiness

* Adds test for zookeeper example

* Address enisoc review

* Remove space between shell and raw annotation

* Remove extranous inserted text

* Remove fencing statement

* Modify sentence for grammer

* refocus to zookeeper with some loss of generality

* Spelling, Grammar, DNS link

* update to address foxish comments
---
 _data/tutorials.yml                           |    4 +-
 docs/tutorials/index.md                       |    2 +
 .../stateful-application/zookeeper.md         | 1248 +++++++++++++++++
 .../stateful-application/zookeeper.yaml       |  164 +++
 test/examples_test.go                         |    8 +
 5 files changed, 1425 insertions(+), 1 deletion(-)
 create mode 100644 docs/tutorials/stateful-application/zookeeper.md
 create mode 100644 docs/tutorials/stateful-application/zookeeper.yaml

diff --git a/_data/tutorials.yml b/_data/tutorials.yml
index 41664efa31..82396ca65a 100644
--- a/_data/tutorials.yml
+++ b/_data/tutorials.yml
@@ -58,4 +58,6 @@ toc:
   - title: Running a Single-Instance Stateful Application
     path: /docs/tutorials/stateful-application/run-stateful-application/
   - title: Running a Replicated Stateful Application
-    path: /docs/tutorials/stateful-application/run-replicated-stateful-application/
\ No newline at end of file
+    path: /docs/tutorials/stateful-application/run-replicated-stateful-application/
+  - title: Running ZooKeeper, A CP Distributed System
+    path: /docs/tutorials/stateful-application/zookeeper/
diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md
index 507ca6d8e1..1b52a15e1a 100644
--- a/docs/tutorials/index.md
+++ b/docs/tutorials/index.md
@@ -26,6 +26,8 @@ each of which has a sequence of steps.
 
 * [Running a Replicated Stateful Application](/docs/tutorials/stateful-application/run-replicated-stateful-application/)
 
+* [Running ZooKeeper, A CP Distributed System](/docs/tutorials/stateful-application/zookeeper/)
+
 ### What's next
 
 If you would like to write a tutorial, see
diff --git a/docs/tutorials/stateful-application/zookeeper.md b/docs/tutorials/stateful-application/zookeeper.md
new file mode 100644
index 0000000000..38315cd37a
--- /dev/null
+++ b/docs/tutorials/stateful-application/zookeeper.md
@@ -0,0 +1,1248 @@
+---
+assignees:
+- bprashanth
+- enisoc
+- erictune
+- foxish
+- janetkuo
+- kow3ns
+- smarterclayton
+---
+
+{% capture overview %}
+This tutorial demonstrates [Apache Zookeeper](https://zookeeper.apache.org) on 
+Kubernetes using [StatefulSets](/docs/concepts/abstractions/controllers/statefulsets/), 
+[PodDisruptionBudgets](/docs/admin/disruptions/#specifying-a-poddisruptionbudget), 
+and [PodAntiAffinity](/docs/user-guide/node-selection/).
+{% endcapture %}
+
+{% capture prerequisites %}
+
+Before starting this tutorial, you should be familiar with the following 
+Kubernetes concepts.
+
+* [Pods](/docs/user-guide/pods/single-container/)
+* [Cluster DNS](/docs/admin/dns/)
+* [Headless Services](/docs/user-guide/services/#headless-services)
+* [PersistentVolumes](/docs/user-guide/volumes/)
+* [PersistentVolume Provisioning](http://releases.k8s.io/{{page.githubbranch}}/examples/experimental/persistent-volume-provisioning/)
+* [ConfigMaps](/docs/user-guide/configmap/)
+* [StatefulSets](/docs/concepts/abstractions/controllers/statefulsets/)
+* [PodDisruptionBudgets](/docs/admin/disruptions/#specifying-a-poddisruptionbudget)
+* [PodAntiAffinity](/docs/user-guide/node-selection/)
+* [kubectl CLI](/docs/user-guide/kubectl)
+
+You will require a cluster with at least four nodes, and each node will require
+at least 2 CPUs and 4 GiB of memory. In this tutorial you will cordon and 
+drain the cluster's nodes. **This means that all Pods on the cluster's nodes 
+will be terminated and evicted, and the nodes will, temporarily, become 
+unschedulable.** You should use a dedicated cluster for this tutorial, or you 
+should ensure that the disruption you cause will not interfere with other 
+tenants.
+
+This tutorial assumes that your cluster is configured to dynamically provision 
+PersistentVolumes. If your cluster is not configured to do so, you
+will have to manually provision three 20 GiB volumes prior to starting this 
+tutorial.
+{% endcapture %}
+
+{% capture objectives %}
+After this tutorial, you will know the following.
+
+* How to deploy a ZooKeeper ensemble using StatefulSet.
+* How to consistently configure the ensemble using ConfigMaps.
+* How to spread the deployment of ZooKeeper servers in the ensemble.
+* How to use PodDisruptionBudgets to ensure service availability during planned maintenance.
+{% endcapture %}
+
+{% capture lessoncontent %}
+
+#### ZooKeeper Basics
+
+[Apache ZooKeeper](https://zookeeper.apache.org/doc/current/) is a 
+distributed, open-source coordination service for distributed applications.
+ZooKeeper allows you to read, write, and observe updates to data. Data are 
+organized in a file system like hierarchy and replicated to all ZooKeeper 
+servers in the ensemble (a set of ZooKeeper servers). All operations on data 
+are atomic and sequentially consistent. ZooKeeper ensures this by using the 
+[Zab](https://pdfs.semanticscholar.org/b02c/6b00bd5dbdbd951fddb00b906c82fa80f0b3.pdf) 
+consensus protocol to replicate a state machine across all servers in the ensemble.
+
+The ensemble uses the Zab protocol to elect a leader, and
+data can not be written until a leader is elected. Once a leader is 
+elected, the ensemble uses Zab to ensure that all writes are replicated to a 
+quorum before they are acknowledged and made visible to clients. Without respect
+to weighted quorums, a quorum is a majority component of the ensemble containing 
+the current leader. For instance, if the ensemble has three servers, a component 
+that contains the leader and one other server constitutes a quorum. If the 
+ensemble can not achieve a quorum, data can not be written.
+
+ZooKeeper servers keep their entire state machine in memory, but every mutation 
+is written to a durable WAL (Write Ahead Log) on storage media. When a server 
+crashes, it can recover its previous state by replaying the WAL. In order to 
+prevent the WAL from growing without bound, ZooKeeper servers will periodically 
+snapshot their in memory state to storage media. These snapshots can be loaded 
+directly into memory, and all WAL entries that preceded the snapshot may be 
+safely discarded.
+
+### Creating a ZooKeeper Ensemble
+
+The manifest below contains a 
+[Headless Service](/docs/user-guide/services/#headless-services), 
+a [ConfigMap](/docs/user-guide/configmap/), 
+a [PodDisruptionBudget](/docs/admin/disruptions/#specifying-a-poddisruptionbudget), 
+and a [StatefulSet](/docs/concepts/abstractions/controllers/statefulsets/). 
+
+{% include code.html language="yaml" file="zookeeper.yaml" ghlink="/docs/tutorials/stateful-application/zookeeper.yaml" %}
+
+Open a command terminal, and use 
+[`kubectl create`](/docs/user-guide/kubectl/kubectl_create/) to create the 
+manifest.
+
+```shell
+kubectl create -f http://k8s.io/docs/tutorials/stateful-application/zookeeper.yaml
+```
+
+This creates the `zk-headless` Headless Service, the `zk-config` ConfigMap, 
+the `zk-budget` PodDisruptionBudget, and the `zk` StatefulSet.
+
+```shell
+service "zk-headless" created
+configmap "zk-config" created
+poddisruptionbudget "zk-budget" created
+statefulset "zk" created
+```
+
+Use [`kubectl get`](/docs/user-guide/kubectl/kubectl_get/)  to watch the
+StatefulSet controller create the StatefulSet's Pods.
+
+```shell
+kubectl get pods -w -l app=zk
+```
+
+Once the `zk-2` Pod is Running and Ready, use `CRTL-C` to  terminate kubectl.
+
+```shell
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      0/1       Pending   0          0s
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       ContainerCreating   0         0s
+zk-0      0/1       Running   0         19s
+zk-0      1/1       Running   0         40s
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       ContainerCreating   0         0s
+zk-1      0/1       Running   0         18s
+zk-1      1/1       Running   0         40s
+zk-2      0/1       Pending   0         0s
+zk-2      0/1       Pending   0         0s
+zk-2      0/1       ContainerCreating   0         0s
+zk-2      0/1       Running   0         19s
+zk-2      1/1       Running   0         40s
+```
+
+The StatefulSet controller creates three Pods, and each Pod has a container with 
+a [ZooKeeper 3.4.9](http://www-us.apache.org/dist/zookeeper/zookeeper-3.4.9/) server.
+
+#### Facilitating Leader Election
+
+As there is no terminating algorithm for electing a leader in an anonymous 
+network, Zab requires explicit membership configuration in order to perform 
+leader election. Each server in the ensemble needs to have a unique 
+identifier, all servers need to know the global set of identifiers, and each
+identifier needs to be associated with a network address.
+
+Use [`kubectl exec`](/docs/user-guide/kubectl/kubectl_exec/) to get the hostnames 
+of the Pods in the `zk` StatefulSet.
+
+```shell
+for i in 0 1 2; do kubectl exec zk-$i -- hostname; done
+```
+
+The StatefulSet controller provides each Pod with a unique hostname based on its 
+ordinal index. The hostnames take the form `<statefulset name>-<ordinal index>`. 
+As the `replicas` field of the `zk` StatefulSet is set to `3`, the Set's 
+controller creates three Pods with their hostnames set to `zk-0`, `zk-1`, and 
+`zk-2`. 
+
+```shell
+zk-0
+zk-1
+zk-2
+```
+
+The servers in a ZooKeeper ensemble use natural numbers as unique identifiers, and 
+each server's identifier is stored in a file called `myid` in the server’s 
+data directory. 
+
+Examine the contents of the `myid` file for each server.
+
+```shell
+for i in 0 1 2; do echo "myid zk-$i";kubectl exec zk-$i -- cat /var/lib/zookeeper/data/myid; done
+```
+
+As the identifiers are natural numbers and the ordinal indices are non-negative 
+integers, you can generate an identifier by adding one to the ordinal.
+
+```shell
+myid zk-0
+1
+myid zk-1
+2
+myid zk-2
+3
+```
+
+Get the FQDN (Fully Qualified Domain Name) of each Pod in the `zk` StatefulSet.
+
+```shell
+for i in 0 1 2; do kubectl exec zk-$i -- hostname -f; done
+```
+
+The `zk-headless` Service creates a domain for all of the Pods, 
+`zk-headless.default.svc.cluster.local`.
+
+```shell
+zk-0.zk-headless.default.svc.cluster.local
+zk-1.zk-headless.default.svc.cluster.local
+zk-2.zk-headless.default.svc.cluster.local
+```
+
+The A records in [Kubernetes DNS](/docs/admin/dns/) resolve the FQDNs to the Pods' IP addresses. 
+If the Pods are rescheduled, the A records will be updated with the Pods' new IP 
+addresses, but the A record's names will not change.
+
+ZooKeeper stores its application configuration in a file named `zoo.cfg`. Use 
+`kubectl exec` to view the contents of the `zoo.cfg` file in the `zk-0` Pod.
+
+```
+kubectl exec zk-0 -- cat /opt/zookeeper/conf/zoo.cfg
+```
+
+For the `server.1`, `server.2`, and `server.3` properties at the bottom of
+the file, the `1`, `2`, and `3` correspond to the identifiers in the
+ZooKeeper servers' `myid` files. They are set to the FQDNs for the Pods in 
+the `zk` StatefulSet. 
+
+```shell
+clientPort=2181
+dataDir=/var/lib/zookeeper/data
+dataLogDir=/var/lib/zookeeper/log
+tickTime=2000
+initLimit=10
+syncLimit=2000
+maxClientCnxns=60
+minSessionTimeout= 4000
+maxSessionTimeout= 40000
+autopurge.snapRetainCount=3
+autopurge.purgeInteval=0
+server.1=zk-0.zk-headless.default.svc.cluster.local:2888:3888
+server.2=zk-1.zk-headless.default.svc.cluster.local:2888:3888
+server.3=zk-2.zk-headless.default.svc.cluster.local:2888:3888
+```
+
+#### Achieving Consensus
+
+Consensus protocols require that the identifiers of each participant be 
+unique. No two participants in the Zab protocol should claim the same unique 
+identifier. This is necessary to allow the processes in the system to agree on 
+which processes have committed which data. If two Pods were launched with the 
+same ordinal, two ZooKeeper servers would both identify themselves as the same
+ server.
+
+When you created the `zk` StatefulSet, the StatefulSet's controller created 
+each Pod sequentially, in the order defined by the Pods' ordinal indices, and it 
+waited for each Pod to be Running and Ready before creating the next Pod. 
+
+```shell
+kubectl get pods -w -l app=zk
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      0/1       Pending   0          0s
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       ContainerCreating   0         0s
+zk-0      0/1       Running   0         19s
+zk-0      1/1       Running   0         40s
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       ContainerCreating   0         0s
+zk-1      0/1       Running   0         18s
+zk-1      1/1       Running   0         40s
+zk-2      0/1       Pending   0         0s
+zk-2      0/1       Pending   0         0s
+zk-2      0/1       ContainerCreating   0         0s
+zk-2      0/1       Running   0         19s
+zk-2      1/1       Running   0         40s
+```
+
+The A records for each Pod are only entered when the Pod becomes Ready. Therefore,
+the FQDNs of the ZooKeeper servers will only resolve to a single endpoint, and that
+endpoint will be the unique ZooKeeper server claiming the identity configured 
+in its `myid` file.
+
+```shell
+zk-0.zk-headless.default.svc.cluster.local
+zk-1.zk-headless.default.svc.cluster.local
+zk-2.zk-headless.default.svc.cluster.local
+```
+
+This ensures that the `servers` properties in the ZooKeepers' `zoo.cfg` files 
+represents a correctly configured ensemble.
+
+```shell
+server.1=zk-0.zk-headless.default.svc.cluster.local:2888:3888
+server.2=zk-1.zk-headless.default.svc.cluster.local:2888:3888
+server.3=zk-2.zk-headless.default.svc.cluster.local:2888:3888
+```
+
+When the servers use the Zab protocol to attempt to commit a value, they will 
+either achieve consensus and commit the value (if leader election has succeeded 
+and at least two of the Pods are Running and Ready), or they will fail to do so 
+(if either of the aforementioned conditions are not met). No state will arise 
+where one server acknowledges a write on behalf of another.
+
+#### Sanity Testing the Ensemble
+
+The most basic sanity test is to write some data to one ZooKeeper server and 
+to read the data from another. 
+
+Use the `zkCli.sh` script to write `world` to the path `/hello` on the `zk-0` Pod.
+
+```shell
+kubectl exec zk-0 zkCli.sh create /hello world
+```
+
+This will write `world` to the `/hello` path in the ensemble.
+
+```shell
+WATCHER::
+
+WatchedEvent state:SyncConnected type:None path:null
+Created /hello
+```
+
+Get the data from the `zk-1` Pod.
+
+```shell
+kubectl exec zk-1 zkCli.sh get /hello
+```
+
+The data that you created on `zk-0` is available on all of the servers in the 
+ensemble.
+
+```shell
+WATCHER::
+
+WatchedEvent state:SyncConnected type:None path:null
+world
+cZxid = 0x100000002
+ctime = Thu Dec 08 15:13:30 UTC 2016
+mZxid = 0x100000002
+mtime = Thu Dec 08 15:13:30 UTC 2016
+pZxid = 0x100000002
+cversion = 0
+dataVersion = 0
+aclVersion = 0
+ephemeralOwner = 0x0
+dataLength = 5
+numChildren = 0
+```
+
+#### Providing Durable Storage
+
+As mentioned in the [ZooKeeper Basics](#zookeeper-basics) section,
+ZooKeeper commits all entries to a durable WAL, and periodically writes snapshots 
+in memory state, to storage media. Using WALs to provide durability is a common 
+technique for applications that use consensus protocols to achieve a replicated
+state machine and for storage applications in general.
+
+Use [`kubectl delete`](/docs/user-guide/kubectl/kubectl_delete/) to delete the 
+`zk` StatefulSet.
+
+```shell
+kubectl delete statefulset zk
+statefulset "zk" deleted
+```
+
+Watch the termination of the Pods in the StatefulSet.
+
+```shell
+get pods -w -l app=zk
+```
+
+When `zk-0` if fully terminated, use `CRTL-C` to terminate kubectl.
+
+```shell
+zk-2      1/1       Terminating   0         9m
+zk-0      1/1       Terminating   0         11m
+zk-1      1/1       Terminating   0         10m
+zk-2      0/1       Terminating   0         9m
+zk-2      0/1       Terminating   0         9m
+zk-2      0/1       Terminating   0         9m
+zk-1      0/1       Terminating   0         10m
+zk-1      0/1       Terminating   0         10m
+zk-1      0/1       Terminating   0         10m
+zk-0      0/1       Terminating   0         11m
+zk-0      0/1       Terminating   0         11m
+zk-0      0/1       Terminating   0         11m
+```
+Reapply the manifest in `zookeeper.yaml`.
+
+```shell
+kubectl apply -f http://k8s.io/docs/tutorials/stateful-application/zookeeper.yaml
+```
+
+The `zk` StatefulSet will be created, but, as they already exist, the other API 
+Objects in the manifest will not be modified.
+
+```shell
+statefulset "zk" created
+Error from server (AlreadyExists): error when creating "zookeeper.yaml": services "zk-headless" already exists
+Error from server (AlreadyExists): error when creating "zookeeper.yaml": configmaps "zk-config" already exists
+Error from server (AlreadyExists): error when creating "zookeeper.yaml": poddisruptionbudgets.policy "zk-budget" already exists
+```
+
+Watch the StatefulSet controller recreate the StatefulSet's Pods.
+
+```shell
+kubectl get pods -w -l app=zk
+```
+
+Once the `zk-2` Pod is Running and Ready, use `CRTL-C` to terminate kubectl.
+
+```shell
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      0/1       Pending   0          0s
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       ContainerCreating   0         0s
+zk-0      0/1       Running   0         19s
+zk-0      1/1       Running   0         40s
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       ContainerCreating   0         0s
+zk-1      0/1       Running   0         18s
+zk-1      1/1       Running   0         40s
+zk-2      0/1       Pending   0         0s
+zk-2      0/1       Pending   0         0s
+zk-2      0/1       ContainerCreating   0         0s
+zk-2      0/1       Running   0         19s
+zk-2      1/1       Running   0         40s
+```
+
+Get the value you entered during the [sanity test](#sanity-testing-the-ensemble), 
+from the `zk-2` Pod.
+
+```shell
+kubectl exec zk-2 zkCli.sh get /hello
+```
+
+Even though all of the Pods in the `zk` StatefulSet have been terminated and 
+recreated, the ensemble still serves the original value.
+
+```shell
+WATCHER::
+
+WatchedEvent state:SyncConnected type:None path:null
+world
+cZxid = 0x100000002
+ctime = Thu Dec 08 15:13:30 UTC 2016
+mZxid = 0x100000002
+mtime = Thu Dec 08 15:13:30 UTC 2016
+pZxid = 0x100000002
+cversion = 0
+dataVersion = 0
+aclVersion = 0
+ephemeralOwner = 0x0
+dataLength = 5
+numChildren = 0
+```
+
+The `volumeClaimTemplates` field, of the `zk` StatefulSet's `spec`, specifies a 
+PersistentVolume that will be provisioned for each Pod. 
+
+```yaml
+volumeClaimTemplates:
+  - metadata:
+      name: datadir
+      annotations:
+        volume.alpha.kubernetes.io/storage-class: anything
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      resources:
+        requests:
+          storage: 20Gi
+```
+
+
+The StatefulSet controller generates a PersistentVolumeClaim for each Pod in 
+the StatefulSet. 
+
+Get the StatefulSet's PersistentVolumeClaims.
+
+```shell
+kubectl get pvc -l app=zk
+```
+
+When the StatefulSet recreated its Pods, the Pods' PersistentVolumes were 
+remounted.
+
+```shell
+NAME           STATUS    VOLUME                                     CAPACITY   ACCESSMODES   AGE
+datadir-zk-0   Bound     pvc-bed742cd-bcb1-11e6-994f-42010a800002   20Gi       RWO           1h
+datadir-zk-1   Bound     pvc-bedd27d2-bcb1-11e6-994f-42010a800002   20Gi       RWO           1h
+datadir-zk-2   Bound     pvc-bee0817e-bcb1-11e6-994f-42010a800002   20Gi       RWO           1h
+```
+
+The `volumeMounts` section of the StatefulSet's container `template` causes the
+PersistentVolumes to be mounted to the ZooKeeper servers' data directories.
+
+```shell
+volumeMounts:
+        - name: datadir
+          mountPath: /var/lib/zookeeper
+```
+
+When a Pod in the `zk` StatefulSet is (re)scheduled, it will always have the 
+same PersistentVolume mounted to the ZooKeeper server's data directory. 
+Even when the Pods are rescheduled, all of the writes made to the ZooKeeper 
+servers' WALs, and all of their snapshots, remain durable.
+
+### Ensuring Consistent Configuration
+
+As noted in the [Facilitating Leader Election](#facilitating-leader-election) and
+[Achieving Consensus](#achieving-consensus) sections, the servers in a 
+ZooKeeper ensemble require consistent configuration in order to elect a leader 
+and form a quorum. They also require consistent configuration of the Zab protocol
+in order for the protocol to work correctly over a network. You can use 
+ConfigMaps to achieve this. 
+
+Get the `zk-config` ConfigMap.
+
+```shell
+ kubectl get cm zk-config -o yaml
+apiVersion: v1
+data:
+  client.cnxns: "60"
+  ensemble: zk-0;zk-1;zk-2
+  init: "10"
+  jvm.heap: 2G
+  purge.interval: "0"
+  snap.retain: "3"
+  sync: "5"
+  tick: "2000"
+```
+
+The `env` field of the `zk` StatefulSet's Pod `template` reads the ConfigMap 
+into environment variables. These variables are injected into the containers 
+environment.
+
+```yaml
+env:
+        - name : ZK_ENSEMBLE
+          valueFrom:
+            configMapKeyRef:
+              name: zk-config
+              key: ensemble
+        - name : ZK_HEAP_SIZE
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: jvm.heap
+        - name : ZK_TICK_TIME
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: tick
+        - name : ZK_INIT_LIMIT
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: init
+        - name : ZK_SYNC_LIMIT
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: tick
+        - name : ZK_MAX_CLIENT_CNXNS
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: client.cnxns
+        - name: ZK_SNAP_RETAIN_COUNT
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: snap.retain
+        - name: ZK_PURGE_INTERVAL
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: purge.interval
+```
+
+The entry point of the container invokes a bash script, `zkConfig.sh`, prior to
+launching the ZooKeeper server process. This bash script generates the 
+ZooKeeper configuration files from the supplied environment variables.
+
+```yaml
+ command:
+        - sh
+        - -c
+        - zkGenConfig.sh && zkServer.sh start-foreground
+```
+
+Examine the environment of all of the Pods in the `zk` StatefulSet.
+
+```shell
+for i in 0 1 2; do kubectl exec zk-$i env | grep ZK_*;echo""; done
+```
+
+All of the variables populated from `zk-config` contain identical values. This 
+allows the `zkGenConfig.sh` script to create consistent configurations for all 
+of the ZooKeeper servers in the ensemble.
+
+```shell
+ZK_ENSEMBLE=zk-0;zk-1;zk-2
+ZK_HEAP_SIZE=2G
+ZK_TICK_TIME=2000
+ZK_INIT_LIMIT=10
+ZK_SYNC_LIMIT=2000
+ZK_MAX_CLIENT_CNXNS=60
+ZK_SNAP_RETAIN_COUNT=3
+ZK_PURGE_INTERVAL=0
+ZK_CLIENT_PORT=2181
+ZK_SERVER_PORT=2888
+ZK_ELECTION_PORT=3888
+ZK_USER=zookeeper
+ZK_DATA_DIR=/var/lib/zookeeper/data
+ZK_DATA_LOG_DIR=/var/lib/zookeeper/log
+ZK_LOG_DIR=/var/log/zookeeper
+
+ZK_ENSEMBLE=zk-0;zk-1;zk-2
+ZK_HEAP_SIZE=2G
+ZK_TICK_TIME=2000
+ZK_INIT_LIMIT=10
+ZK_SYNC_LIMIT=2000
+ZK_MAX_CLIENT_CNXNS=60
+ZK_SNAP_RETAIN_COUNT=3
+ZK_PURGE_INTERVAL=0
+ZK_CLIENT_PORT=2181
+ZK_SERVER_PORT=2888
+ZK_ELECTION_PORT=3888
+ZK_USER=zookeeper
+ZK_DATA_DIR=/var/lib/zookeeper/data
+ZK_DATA_LOG_DIR=/var/lib/zookeeper/log
+ZK_LOG_DIR=/var/log/zookeeper
+
+ZK_ENSEMBLE=zk-0;zk-1;zk-2
+ZK_HEAP_SIZE=2G
+ZK_TICK_TIME=2000
+ZK_INIT_LIMIT=10
+ZK_SYNC_LIMIT=2000
+ZK_MAX_CLIENT_CNXNS=60
+ZK_SNAP_RETAIN_COUNT=3
+ZK_PURGE_INTERVAL=0
+ZK_CLIENT_PORT=2181
+ZK_SERVER_PORT=2888
+ZK_ELECTION_PORT=3888
+ZK_USER=zookeeper
+ZK_DATA_DIR=/var/lib/zookeeper/data
+ZK_DATA_LOG_DIR=/var/lib/zookeeper/log
+ZK_LOG_DIR=/var/log/zookeeper
+```
+
+#### Configuring Logging
+
+One of the files generated by the `zkConfigGen.sh` script controls ZooKeeper's logging. 
+ZooKeeper uses [Log4j](http://logging.apache.org/log4j/2.x/), and, by default, 
+it uses a time and size based rolling file appender for its logging configuration. 
+Get the logging configuration from one of Pods in the `zk` StatefulSet.
+
+```shell
+kubectl exec zk-0 cat /usr/etc/zookeeper/log4j.properties
+```
+
+The logging configuration below will cause the ZooKeeper process to write all 
+of its logs to the standard output file stream.
+
+```shell
+zookeeper.root.logger=CONSOLE
+zookeeper.console.threshold=INFO
+log4j.rootLogger=${zookeeper.root.logger}
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+```
+
+This is the simplest possible way to safely log inside the container. As the 
+application's logs are being written to standard out, Kubernetes will handle 
+log rotation for you. Kubernetes also implements a sane retention policy that 
+ensures application logs written to standard out and standard error do not 
+exhaust local storage media.
+
+Use [`kubectl logs`](/docs/user-guide/kubectl/kubectl_logs/) to retrieve the last 
+few log lines from one of the Pods.
+
+```shell
+kubectl logs zk-0 --tail 20
+```
+
+Application logs that are written to standard out or standard error are viewable 
+using `kubectl logs` and from the Kubernetes Dashboard.
+
+```shell
+2016-12-06 19:34:16,236 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@827] - Processing ruok command from /127.0.0.1:52740
+2016-12-06 19:34:16,237 [myid:1] - INFO  [Thread-1136:NIOServerCnxn@1008] - Closed socket connection for client /127.0.0.1:52740 (no session established for client)
+2016-12-06 19:34:26,155 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@192] - Accepted socket connection from /127.0.0.1:52749
+2016-12-06 19:34:26,155 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@827] - Processing ruok command from /127.0.0.1:52749
+2016-12-06 19:34:26,156 [myid:1] - INFO  [Thread-1137:NIOServerCnxn@1008] - Closed socket connection for client /127.0.0.1:52749 (no session established for client)
+2016-12-06 19:34:26,222 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@192] - Accepted socket connection from /127.0.0.1:52750
+2016-12-06 19:34:26,222 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@827] - Processing ruok command from /127.0.0.1:52750
+2016-12-06 19:34:26,226 [myid:1] - INFO  [Thread-1138:NIOServerCnxn@1008] - Closed socket connection for client /127.0.0.1:52750 (no session established for client)
+2016-12-06 19:34:36,151 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@192] - Accepted socket connection from /127.0.0.1:52760
+2016-12-06 19:34:36,152 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@827] - Processing ruok command from /127.0.0.1:52760
+2016-12-06 19:34:36,152 [myid:1] - INFO  [Thread-1139:NIOServerCnxn@1008] - Closed socket connection for client /127.0.0.1:52760 (no session established for client)
+2016-12-06 19:34:36,230 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@192] - Accepted socket connection from /127.0.0.1:52761
+2016-12-06 19:34:36,231 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@827] - Processing ruok command from /127.0.0.1:52761
+2016-12-06 19:34:36,231 [myid:1] - INFO  [Thread-1140:NIOServerCnxn@1008] - Closed socket connection for client /127.0.0.1:52761 (no session established for client)
+2016-12-06 19:34:46,149 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@192] - Accepted socket connection from /127.0.0.1:52767
+2016-12-06 19:34:46,149 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@827] - Processing ruok command from /127.0.0.1:52767
+2016-12-06 19:34:46,149 [myid:1] - INFO  [Thread-1141:NIOServerCnxn@1008] - Closed socket connection for client /127.0.0.1:52767 (no session established for client)
+2016-12-06 19:34:46,230 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@192] - Accepted socket connection from /127.0.0.1:52768
+2016-12-06 19:34:46,230 [myid:1] - INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@827] - Processing ruok command from /127.0.0.1:52768
+2016-12-06 19:34:46,230 [myid:1] - INFO  [Thread-1142:NIOServerCnxn@1008] - Closed socket connection for client /127.0.0.1:52768 (no session established for client)
+```
+
+Kubernetes also supports more powerful, but more complex, logging integrations 
+with [Google Cloud Logging](https://github.com/kubernetes/contrib/blob/master/logging/fluentd-sidecar-gcp/README.md) 
+and [ELK](https://github.com/kubernetes/contrib/blob/master/logging/fluentd-sidecar-es/README.md).
+For cluster level log shipping and aggregation, you should consider deploying a
+[sidecar](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) 
+container to rotate and ship your logs.
+
+#### Configuring a Non-Privileged User
+
+The best practices with respect to allowing an application to run as a privileged 
+user inside of a container are a matter of debate. If your organization requires 
+that applications be run as a non-privileged user you can use a 
+[SecurityContext](/docs/user-guide/security-context/) to control the user that 
+the entry point runs as.
+
+The `zk` StatefulSet's Pod `template` contains a SecurityContext.
+
+```yaml
+securityContext:
+  runAsUser: 1000
+  fsGroup: 1000
+```
+
+In the Pods' containers, UID 1000 corresponds to the zookeeper user and GID 1000 
+corresponds to the zookeeper group.
+
+Get the ZooKeeper process information from the `zk-0` Pod.
+
+```shell
+kubectl exec zk-0 -- ps -elf
+```
+
+As the `runAsUser` field of the `securityContext` object is set to 1000, 
+instead of running as root, the ZooKeeper process runs as the zookeeper user.
+
+```shell
+F S UID        PID  PPID  C PRI  NI ADDR SZ WCHAN  STIME TTY          TIME CMD
+4 S zookeep+     1     0  0  80   0 -  1127 -      20:46 ?        00:00:00 sh -c zkGenConfig.sh && zkServer.sh start-foreground
+0 S zookeep+    27     1  0  80   0 - 1155556 -    20:46 ?        00:00:19 /usr/lib/jvm/java-8-openjdk-amd64/bin/java -Dzookeeper.log.dir=/var/log/zookeeper -Dzookeeper.root.logger=INFO,CONSOLE -cp /usr/bin/../build/classes:/usr/bin/../build/lib/*.jar:/usr/bin/../share/zookeeper/zookeeper-3.4.9.jar:/usr/bin/../share/zookeeper/slf4j-log4j12-1.6.1.jar:/usr/bin/../share/zookeeper/slf4j-api-1.6.1.jar:/usr/bin/../share/zookeeper/netty-3.10.5.Final.jar:/usr/bin/../share/zookeeper/log4j-1.2.16.jar:/usr/bin/../share/zookeeper/jline-0.9.94.jar:/usr/bin/../src/java/lib/*.jar:/usr/bin/../etc/zookeeper: -Xmx2G -Xms2G -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=false org.apache.zookeeper.server.quorum.QuorumPeerMain /usr/bin/../etc/zookeeper/zoo.cfg
+```
+
+By default, when the Pod's PersistentVolume is mounted to the ZooKeeper server's 
+data directory, it is only accessible by the root user. This configuration 
+prevents the ZooKeeper process from writing to its WAL and storing its snapshots.
+
+Get the file permissions of the ZooKeeper data directory on the `zk-0` Pod.
+
+```shell
+kubectl exec -ti zk-0 -- ls -ld /var/lib/zookeeper/data
+```
+
+As the `fsGroup` field of the `securityContext` object is set to 1000, 
+the ownership of the Pods' PersistentVolumes is set to the zookeeper group, 
+and the ZooKeeper process is able to successfully read and write its data.
+
+```shell
+drwxr-sr-x 3 zookeeper zookeeper 4096 Dec  5 20:45 /var/lib/zookeeper/data
+```
+
+### Managing the ZooKeeper Process
+
+The [ZooKeeper documentation](https://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_supervision) 
+documentation indicates that "You will want to have a supervisory process that 
+manages each of your ZooKeeper server processes (JVM)." Utilizing a watchdog 
+(supervisory process) to restart failed processes in a distributed system is a 
+common pattern. When deploying an application in Kubernetes, rather than using 
+an external utility as a supervisory process, you should use Kubernetes as the 
+watchdog for your application.
+
+#### Handling Process Failure 
+
+
+[Restart Policies](/docs/user-guide/pod-states/#restartpolicy) control how 
+Kubernetes handles process failures for the entry point of the container in a Pod.
+For Pods in a StatefulSet, the only appropriate RestartPolicy is Always, and this
+is the default value. For stateful applications you should **never** override 
+the default policy.
+
+
+Examine the process tree for the ZooKeeper server running in the `zk-0` Pod.
+
+```shell
+kubectl exec zk-0 -- ps -ef
+```
+
+The command used as the container's entry point has PID 1, and the 
+the ZooKeeper process, a child of the entry point, has PID 23.
+
+
+```
+UID        PID  PPID  C STIME TTY          TIME CMD
+zookeep+     1     0  0 15:03 ?        00:00:00 sh -c zkGenConfig.sh && zkServer.sh start-foreground
+zookeep+    27     1  0 15:03 ?        00:00:03 /usr/lib/jvm/java-8-openjdk-amd64/bin/java -Dzookeeper.log.dir=/var/log/zookeeper -Dzookeeper.root.logger=INFO,CONSOLE -cp /usr/bin/../build/classes:/usr/bin/../build/lib/*.jar:/usr/bin/../share/zookeeper/zookeeper-3.4.9.jar:/usr/bin/../share/zookeeper/slf4j-log4j12-1.6.1.jar:/usr/bin/../share/zookeeper/slf4j-api-1.6.1.jar:/usr/bin/../share/zookeeper/netty-3.10.5.Final.jar:/usr/bin/../share/zookeeper/log4j-1.2.16.jar:/usr/bin/../share/zookeeper/jline-0.9.94.jar:/usr/bin/../src/java/lib/*.jar:/usr/bin/../etc/zookeeper: -Xmx2G -Xms2G -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=false org.apache.zookeeper.server.quorum.QuorumPeerMain /usr/bin/../etc/zookeeper/zoo.cfg
+```
+
+
+In one terminal watch the Pods in the `zk` StatefulSet.
+
+```shell
+kubectl get pod -w -l app=zk
+```
+
+
+In another terminal, kill the ZooKeeper process in Pod `zk-0`.
+
+```shell
+ kubectl exec zk-0 -- pkill java
+```
+
+
+The death of the ZooKeeper process caused its parent process to terminate. As 
+the RestartPolicy of the container is Always, the parent process was relaunched. 
+
+
+```shell
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      1/1       Running   0          21m
+zk-1      1/1       Running   0          20m
+zk-2      1/1       Running   0          19m
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      0/1       Error     0          29m
+zk-0      0/1       Running   1         29m
+zk-0      1/1       Running   1         29m
+```
+
+
+If your application uses a script (such as zkServer.sh) to launch the process 
+that implements the application's business logic, the script must terminate with the
+child process. This ensures that Kubernetes will restart the application's
+container when the process implementing the application's business logic fails. 
+
+
+#### Testing for Liveness
+
+
+Configuring your application to restart failed processes is not sufficient to 
+keep a distributed system healthy. There are many scenarios where 
+a system's processes can be both alive and unresponsive, or otherwise 
+unhealthy. You should use liveness probes in order to notify Kubernetes 
+that your application's processes are unhealthy and should be restarted.
+
+
+The Pod `template` for the `zk` StatefulSet specifies a liveness probe.
+
+
+```yaml
+ livenessProbe:
+          exec:
+            command:
+            - "zkOk.sh"
+          initialDelaySeconds: 15
+          timeoutSeconds: 5
+```
+
+
+The probe calls a simple bash script that uses the ZooKeeper `ruok` four letter 
+word to test the server's health.
+
+
+```bash
+ZK_CLIENT_PORT=${ZK_CLIENT_PORT:-2181}
+OK=$(echo ruok | nc 127.0.0.1 $ZK_CLIENT_PORT)
+if [ "$OK" == "imok" ]; then
+	exit 0
+else
+	exit 1
+fi
+```
+
+
+In one terminal window, watch the Pods in the `zk` StatefulSet.
+
+
+```shell
+kubectl get pod -w -l app=zk
+```
+
+
+In another window, delete the `zkOk.sh` script from the file system of Pod `zk-0`.
+
+
+```shell
+kubectl exec zk-0 -- rm /opt/zookeeper/bin/zkOk.sh
+```
+
+
+When the liveness probe for the ZooKeeper process fails, Kubernetes will 
+automatically restart the process for you, ensuring that unhealthy processes in
+the ensemble are restarted.
+
+
+```shell
+kubectl get pod -w -l app=zk
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      1/1       Running   0          1h
+zk-1      1/1       Running   0          1h
+zk-2      1/1       Running   0          1h
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      0/1       Running   0          1h
+zk-0      0/1       Running   1         1h
+zk-0      1/1       Running   1         1h
+```
+
+
+#### Testing for Readiness
+
+
+Readiness is not the same as liveness. If a process is alive, it is scheduled 
+and healthy. If a process is ready, it is able to process input. Liveness is 
+a necessary, but not sufficient, condition for readiness. There are many cases,
+particularly during initialization and termination, when a process can be 
+alive but not ready.
+
+
+If you specify a readiness probe, Kubernetes will ensure that your application's
+processes will not receive network traffic until their readiness checks pass.
+
+
+For a ZooKeeper server, liveness implies readiness.  Therefore, the readiness 
+probe from the `zookeeper.yaml` manifest is identical to the liveness probe. 
+
+
+```yaml
+ readinessProbe:
+          exec:
+            command:
+            - "zkOk.sh"
+          initialDelaySeconds: 15
+          timeoutSeconds: 5
+```
+
+
+Even though the liveness and readiness probes are identical, it is important 
+to specify both. This ensures that only healthy servers in the ZooKeeper 
+ensemble receive network traffic.
+
+
+### Tolerating Node Failure
+
+ZooKeeper needs a quorum of servers in order to successfully commit mutations 
+to data. For a three server ensemble, two servers must be healthy in order for 
+writes to succeed. In quorum based systems, members are deployed across failure 
+domains to ensure availability. In order to avoid an outage, due to the loss of an 
+individual machine, best practices preclude co-locating multiple instances of the 
+application on the same machine.
+
+By default, Kubernetes may co-locate Pods in a StatefulSet on the same node. 
+For the three server ensemble you created, if two servers reside on the same
+node, and that node fails, the clients of your ZooKeeper service will experience
+an outage until at least one of the Pods can be rescheduled. 
+
+You should always provision additional capacity to allow the processes of critical
+systems to be rescheduled in the event of node failures. If you do so, then the 
+outage will only last until the Kubernetes scheduler reschedules one of the ZooKeeper 
+servers. However, if you want your service to tolerate node failures with no downtime,
+you should use a `PodAntiAffinity` annotation.
+
+Get the nodes for Pods in the `zk` Stateful Set.
+
+```shell{% raw %}
+for i in 0 1 2; do kubectl get pod zk-$i --template {{.spec.nodeName}}; echo ""; done
+``` {% endraw %}
+
+All of the Pods in the `zk` StatefulSet are deployed on different nodes.
+
+```shell
+kubernetes-minion-group-cxpk
+kubernetes-minion-group-a5aq
+kubernetes-minion-group-2g2d
+```
+
+This is because the Pods in the `zk` StatefulSet contain a 
+[PodAntiAffinity](/docs/user-guide/node-selection/) annotation.
+
+```yaml
+scheduler.alpha.kubernetes.io/affinity: >
+            {
+              "podAntiAffinity": {
+                "requiredDuringSchedulingRequiredDuringExecution": [{
+                  "labelSelector": {
+                    "matchExpressions": [{
+                      "key": "app",
+                      "operator": "In",
+                      "values": ["zk-headless"]
+                    }]
+                  },
+                  "topologyKey": "kubernetes.io/hostname"
+                }]
+              }
+            }
+```
+
+The `requiredDuringSchedulingRequiredDuringExecution` field tells the 
+Kubernetes Scheduler that it should never co-locate two Pods from the `zk-headless`
+Service in the domain defined by the `topologyKey`. The `topologyKey`
+`kubernetes.io/hostname` indicates that the domain is an individual node. Using 
+different rules, labels, and selectors, you can extend this technique to spread 
+your ensemble across physical, network, and power failure domains.
+
+### Surviving Maintenance
+
+**In this section you will cordon and drain nodes. If you are using this tutorial
+on a shared cluster, be sure that this will not adversely affect other tenants.**
+
+The previous section showed you how to spread your Pods across nodes to survive 
+unplanned node failures, but you also need to plan for temporary node failures 
+that occur due to planned maintenance.
+
+Get the nodes in your cluster.
+
+```shell
+kubectl get nodes
+```
+
+Use [`kubectl cordon`](/docs/user-guide/kubectl/kubectl_cordon/) to 
+cordon all but four of the nodes in your cluster.
+
+```shell{% raw %}
+kubectl cordon < node name >
+```{% endraw %}
+
+Get the `zk-budget` PodDisruptionBudget.
+
+```shell
+kubectl get poddisruptionbudget zk-budget
+```
+
+The `min-available` field indicates to Kubernetes that at least two Pods from 
+`zk` StatefulSet must be available at any time. 
+
+```yaml
+NAME        MIN-AVAILABLE   ALLOWED-DISRUPTIONS   AGE
+zk-budget   2               1                     1h
+
+```
+
+In one terminal, watch the Pods in the `zk` StatefulSet.
+
+```shell
+kubectl get pods -w -l app=zk
+```
+
+In another terminal, get the nodes that the Pods are currently scheduled on.
+
+```shell{% raw %}
+for i in 0 1 2; do kubectl get pod zk-$i --template {{.spec.nodeName}}; echo ""; done
+kubernetes-minion-group-pb41
+kubernetes-minion-group-ixsl
+kubernetes-minion-group-i4c4
+{% endraw %}```
+
+Use [`kubectl drain`](/docs/user-guide/kubectl/kubectl_drain/) to cordon and 
+drain the node on which the `zk-0` Pod is scheduled.
+
+```shell {% raw %}
+kubectl drain $(kubectl get pod zk-0 --template {{.spec.nodeName}}) --ignore-daemonsets --force --delete-local-data
+WARNING: Deleting pods not managed by ReplicationController, ReplicaSet, Job, or DaemonSet: fluentd-cloud-logging-kubernetes-minion-group-pb41, kube-proxy-kubernetes-minion-group-pb41; Ignoring DaemonSet-managed pods: node-problem-detector-v0.1-o5elz
+pod "zk-0" deleted
+node "kubernetes-minion-group-pb41" drained
+{% endraw %}```
+
+As there are four nodes in your cluster, `kubectl drain`, succeeds and the 
+`zk-0` is rescheduled to another node.
+
+```
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      1/1       Running   2          1h
+zk-1      1/1       Running   0          1h
+zk-2      1/1       Running   0          1h
+NAME      READY     STATUS        RESTARTS   AGE
+zk-0      1/1       Terminating   2          2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       ContainerCreating   0         0s
+zk-0      0/1       Running   0         51s
+zk-0      1/1       Running   0         1m
+```
+
+Keep watching the StatefulSet's Pods in the first terminal and drain the node on which 
+`zk-1` is scheduled.
+
+```shell{% raw %}
+kubectl drain $(kubectl get pod zk-1 --template {{.spec.nodeName}}) --ignore-daemonsets --force --delete-local-data "kubernetes-minion-group-ixsl" cordoned
+WARNING: Deleting pods not managed by ReplicationController, ReplicaSet, Job, or DaemonSet: fluentd-cloud-logging-kubernetes-minion-group-ixsl, kube-proxy-kubernetes-minion-group-ixsl; Ignoring DaemonSet-managed pods: node-problem-detector-v0.1-voc74
+pod "zk-1" deleted
+node "kubernetes-minion-group-ixsl" drained
+{% endraw %}```
+
+The `zk-1` Pod can not be scheduled. As the `zk` StatefulSet contains a 
+`PodAntiAffinity` annotation preventing co-location of the Pods, and  as only 
+two nodes are schedulable, the Pod will remain in a Pending state.
+
+```shell
+kubectl get pods -w -l app=zk
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      1/1       Running   2          1h
+zk-1      1/1       Running   0          1h
+zk-2      1/1       Running   0          1h
+NAME      READY     STATUS        RESTARTS   AGE
+zk-0      1/1       Terminating   2          2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       ContainerCreating   0         0s
+zk-0      0/1       Running   0         51s
+zk-0      1/1       Running   0         1m
+zk-1      1/1       Terminating   0         2h
+zk-1      0/1       Terminating   0         2h
+zk-1      0/1       Terminating   0         2h
+zk-1      0/1       Terminating   0         2h
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       Pending   0         0s
+```
+
+Continue to watch the Pods of the stateful set, and drain the node on which 
+`zk-2` is scheduled.
+
+```shell{% raw %}
+kubectl drain $(kubectl get pod zk-2 --template {{.spec.nodeName}}) --ignore-daemonsets --force --delete-local-data
+node "kubernetes-minion-group-i4c4" cordoned
+WARNING: Deleting pods not managed by ReplicationController, ReplicaSet, Job, or DaemonSet: fluentd-cloud-logging-kubernetes-minion-group-i4c4, kube-proxy-kubernetes-minion-group-i4c4; Ignoring DaemonSet-managed pods: node-problem-detector-v0.1-dyrog
+WARNING: Ignoring DaemonSet-managed pods: node-problem-detector-v0.1-dyrog; Deleting pods not managed by ReplicationController, ReplicaSet, Job, or DaemonSet: fluentd-cloud-logging-kubernetes-minion-group-i4c4, kube-proxy-kubernetes-minion-group-i4c4
+There are pending pods when an error occurred: Cannot evict pod as it would violate the pod's disruption budget.
+pod/zk-2
+{% endraw %}```
+
+Use `CRTL-C` to terminate to kubectl. 
+
+You can not drain the third node because evicting `zk-2` would violate `zk-budget`. However, 
+the node will remain cordoned.
+
+Use `zkCli.sh` to retrieve the value you entered during the sanity test from `zk-0`.
+
+```shell
+kubectl exec zk-0 zkCli.sh get /hello
+```
+
+The service is still available because its PodDisruptionBudget is respected.
+
+```
+WatchedEvent state:SyncConnected type:None path:null
+world
+cZxid = 0x200000002
+ctime = Wed Dec 07 00:08:59 UTC 2016
+mZxid = 0x200000002
+mtime = Wed Dec 07 00:08:59 UTC 2016
+pZxid = 0x200000002
+cversion = 0
+dataVersion = 0
+aclVersion = 0
+ephemeralOwner = 0x0
+dataLength = 5
+numChildren = 0
+```
+
+Use [`kubectl uncordon`](/docs/user-guide/kubectl/kubectl_uncordon/) to uncordon the first node.
+
+```shell
+kubectl uncordon kubernetes-minion-group-pb41
+node "kubernetes-minion-group-pb41" uncordoned
+```
+
+`zk-1` is rescheduled on this node. Wait until `zk-1` is Running and Ready.
+
+```shell
+kubectl get pods -w -l app=zk
+NAME      READY     STATUS    RESTARTS   AGE
+zk-0      1/1       Running   2          1h
+zk-1      1/1       Running   0          1h
+zk-2      1/1       Running   0          1h
+NAME      READY     STATUS        RESTARTS   AGE
+zk-0      1/1       Terminating   2          2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Terminating   2         2h
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       Pending   0         0s
+zk-0      0/1       ContainerCreating   0         0s
+zk-0      0/1       Running   0         51s
+zk-0      1/1       Running   0         1m
+zk-1      1/1       Terminating   0         2h
+zk-1      0/1       Terminating   0         2h
+zk-1      0/1       Terminating   0         2h
+zk-1      0/1       Terminating   0         2h
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       Pending   0         0s
+zk-1      0/1       Pending   0         12m
+zk-1      0/1       ContainerCreating   0         12m
+zk-1      0/1       Running   0         13m
+zk-1      1/1       Running   0         13m
+```
+
+Attempt to drain the node on which `zk-2` is scheduled.
+
+```shell{% raw %}
+kubectl drain $(kubectl get pod zk-2 --template {{.spec.nodeName}}) --ignore-daemonsets --force --delete-local-data
+node "kubernetes-minion-group-i4c4" already cordoned
+WARNING: Deleting pods not managed by ReplicationController, ReplicaSet, Job, or DaemonSet: fluentd-cloud-logging-kubernetes-minion-group-i4c4, kube-proxy-kubernetes-minion-group-i4c4; Ignoring DaemonSet-managed pods: node-problem-detector-v0.1-dyrog
+pod "heapster-v1.2.0-2604621511-wht1r" deleted
+pod "zk-2" deleted
+node "kubernetes-minion-group-i4c4" drained
+{% endraw %}```
+
+This time `kubectl drain` succeeds.
+
+Uncordon the second node to allow `zk-2` to be rescheduled.
+
+```shell
+kubectl uncordon kubernetes-minion-group-ixsl
+node "kubernetes-minion-group-ixsl" uncordoned
+```
+
+You can use `kubectl drain` in conjunction with PodDisruptionBudgets to ensure that your service
+remains available during maintenance. If drain is used to cordon nodes and evict pods prior to 
+taking the node offline for maintenance, services that express a disruption budget will have that 
+budget respected. You should always allocate additional capacity for critical services so that 
+their Pods can be immediately rescheduled.
+
+{% endcapture %}
+
+{% capture cleanup %}
+* Use `kubectl uncordon` to uncordon all the nodes in your cluster.
+* You will need to delete the persistent storage media for the PersistentVolumes
+used in this tutorial. Follow the necessary steps, based on your environment, 
+storage configuration, and provisioning method, to ensure that all storage is 
+reclaimed.
+{% endcapture %}
+{% include templates/tutorial.md %}
diff --git a/docs/tutorials/stateful-application/zookeeper.yaml b/docs/tutorials/stateful-application/zookeeper.yaml
new file mode 100644
index 0000000000..75c4220576
--- /dev/null
+++ b/docs/tutorials/stateful-application/zookeeper.yaml
@@ -0,0 +1,164 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: zk-headless
+  labels:
+    app: zk-headless
+spec:
+  ports:
+  - port: 2888
+    name: server
+  - port: 3888
+    name: leader-election
+  clusterIP: None
+  selector:
+    app: zk
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: zk-config
+data:
+  ensemble: "zk-0;zk-1;zk-2"
+  jvm.heap: "2G"
+  tick: "2000"
+  init: "10"
+  sync: "5"
+  client.cnxns: "60"
+  snap.retain: "3"
+  purge.interval: "1"
+---
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+  name: zk-budget
+spec:
+  selector:
+    matchLabels:
+      app: zk
+  minAvailable: 2
+---
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+  name: zk
+spec:
+  serviceName: zk-headless
+  replicas: 3
+  template:
+    metadata:
+      labels:
+        app: zk
+      annotations:
+        pod.alpha.kubernetes.io/initialized: "true"
+        scheduler.alpha.kubernetes.io/affinity: >
+            {
+              "podAntiAffinity": {
+                "requiredDuringSchedulingRequiredDuringExecution": [{
+                  "labelSelector": {
+                    "matchExpressions": [{
+                      "key": "app",
+                      "operator": "In",
+                      "values": ["zk-headless"]
+                    }]
+                  },
+                  "topologyKey": "kubernetes.io/hostname"
+                }]
+              }
+            }
+    spec:
+      containers:
+      - name: k8szk
+        imagePullPolicy: Always
+        image: gcr.io/google_samples/k8szk:v1
+        resources:
+          requests:
+            memory: "4Gi"
+            cpu: "1"
+        ports:
+        - containerPort: 2181
+          name: client
+        - containerPort: 2888
+          name: server
+        - containerPort: 3888
+          name: leader-election
+        env:
+        - name : ZK_ENSEMBLE
+          valueFrom:
+            configMapKeyRef:
+              name: zk-config
+              key: ensemble
+        - name : ZK_HEAP_SIZE
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: jvm.heap
+        - name : ZK_TICK_TIME
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: tick
+        - name : ZK_INIT_LIMIT
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: init
+        - name : ZK_SYNC_LIMIT
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: tick
+        - name : ZK_MAX_CLIENT_CNXNS
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: client.cnxns
+        - name: ZK_SNAP_RETAIN_COUNT
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: snap.retain
+        - name: ZK_PURGE_INTERVAL
+          valueFrom:
+            configMapKeyRef:
+                name: zk-config
+                key: purge.interval
+        - name: ZK_CLIENT_PORT
+          value: "2181"
+        - name: ZK_SERVER_PORT
+          value: "2888"
+        - name: ZK_ELECTION_PORT
+          value: "3888"
+        command:
+        - sh
+        - -c
+        - zkGenConfig.sh && zkServer.sh start-foreground
+        readinessProbe:
+          exec:
+            command:
+            - "zkOk.sh"
+          initialDelaySeconds: 15
+          timeoutSeconds: 5
+        livenessProbe:
+          exec:
+            command:
+            - "zkOk.sh"
+          initialDelaySeconds: 15
+          timeoutSeconds: 5
+        volumeMounts:
+        - name: datadir
+          mountPath: /var/lib/zookeeper
+      securityContext:
+        runAsUser: 1000
+        fsGroup: 1000
+  volumeClaimTemplates:
+  - metadata:
+      name: datadir
+      annotations:
+        volume.alpha.kubernetes.io/storage-class: anything
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      resources:
+        requests:
+          storage: 20Gi
diff --git a/test/examples_test.go b/test/examples_test.go
index cb876db9ec..22e71c8bb0 100644
--- a/test/examples_test.go
+++ b/test/examples_test.go
@@ -38,6 +38,8 @@ import (
 	"k8s.io/kubernetes/pkg/apis/extensions"
 	expvalidation "k8s.io/kubernetes/pkg/apis/extensions/validation"
 	"k8s.io/kubernetes/pkg/capabilities"
+	"k8s.io/kubernetes/pkg/apis/policy"
+	policyvalidation "k8s.io/kubernetes/pkg/apis/policy/validation"
 	"k8s.io/kubernetes/pkg/registry/batch/job"
 	"k8s.io/kubernetes/pkg/runtime"
 	"k8s.io/kubernetes/pkg/types"
@@ -147,6 +149,11 @@ func validateObject(obj runtime.Object) (errors field.ErrorList) {
 			t.Namespace = api.NamespaceDefault
 		}
 		errors = apps_validation.ValidateStatefulSet(t)
+	case *policy.PodDisruptionBudget:
+		if t.Namespace == "" {
+			t.Namespace = api.NamespaceDefault
+		}
+		errors = policyvalidation.ValidatePodDisruptionBudget(t)
 	default:
 		errors = field.ErrorList{}
 		errors = append(errors, field.InternalError(field.NewPath(""), fmt.Errorf("no validation defined for %#v", obj)))
@@ -323,6 +330,7 @@ func TestExampleObjectSchemas(t *testing.T) {
 			"mysql-configmap":   {&api.ConfigMap{}},
 			"mysql-statefulset": {&apps.StatefulSet{}},
 			"web":               {&api.Service{}, &apps.StatefulSet{}},
+			"zookeeper":         {&api.Service{}, &api.ConfigMap{}, &policy.PodDisruptionBudget{}, &apps.StatefulSet{}},
 		},
 	}