Update to k8s 1.9.4.

pull/2633/head
dlorenc 2018-03-18 19:40:34 -07:00
parent 6bf2661ca2
commit a000c35e17
102 changed files with 47819 additions and 8321 deletions

2958
Godeps/Godeps.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +0,0 @@
BRANCH=`git rev-parse --abbrev-ref HEAD`
COMMIT=`git rev-parse --short HEAD`
GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
default: build
race:
@go test -v -race -test.run="TestSimulate_(100op|1000op)"
# go get github.com/kisielk/errcheck
errcheck:
@errcheck -ignorepkg=bytes -ignore=os:Remove github.com/boltdb/bolt
test:
@go test -v -cover .
@go test -v ./cmd/bolt
.PHONY: fmt test

View File

@ -2,3 +2,4 @@
*.test *.test
*.swp *.swp
/bin/ /bin/
cmd/bolt/bolt

30
vendor/github.com/coreos/bbolt/Makefile generated vendored Normal file
View File

@ -0,0 +1,30 @@
BRANCH=`git rev-parse --abbrev-ref HEAD`
COMMIT=`git rev-parse --short HEAD`
GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
default: build
race:
@go test -v -race -test.run="TestSimulate_(100op|1000op)"
fmt:
!(gofmt -l -s -d $(shell find . -name \*.go) | grep '[a-z]')
# go get honnef.co/go/tools/simple
gosimple:
gosimple ./...
# go get honnef.co/go/tools/unused
unused:
unused ./...
# go get github.com/kisielk/errcheck
errcheck:
@errcheck -ignorepkg=bytes -ignore=os:Remove github.com/coreos/bbolt
test:
go test -timeout 20m -v -coverprofile cover.out -covermode atomic
# Note: gets "program not an importable package" in out of path builds
go test -v ./cmd/bolt
.PHONY: race fmt errcheck test gosimple unused

View File

@ -1,6 +1,16 @@
Bolt [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.svg?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.svg)](https://godoc.org/github.com/boltdb/bolt) ![Version](https://img.shields.io/badge/version-1.2.1-green.svg) bbolt
==== ====
[![Go Report Card](https://goreportcard.com/badge/github.com/coreos/bbolt?style=flat-square)](https://goreportcard.com/report/github.com/coreos/bbolt)
[![Coverage](https://codecov.io/gh/coreos/bbolt/branch/master/graph/badge.svg)](https://codecov.io/gh/coreos/bbolt)
[![Godoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](https://godoc.org/github.com/coreos/bbolt)
bbolt is a fork of [Ben Johnson's][gh_ben] [Bolt][bolt] key/value
store. The purpose of this fork is to provide the Go community with an active
maintenance and development target for Bolt; the goal is improved reliability
and stability. bbolt includes bug fixes, performance enhancements, and features
not found in Bolt while preserving backwards compatibility with the Bolt API.
Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas]
[LMDB project][lmdb]. The goal of the project is to provide a simple, [LMDB project][lmdb]. The goal of the project is to provide a simple,
fast, and reliable database for projects that don't require a full database fast, and reliable database for projects that don't require a full database
@ -10,16 +20,18 @@ Since Bolt is meant to be used as such a low-level piece of functionality,
simplicity is key. The API will be small and only focus on getting values simplicity is key. The API will be small and only focus on getting values
and setting values. That's it. and setting values. That's it.
[gh_ben]: https://github.com/benbjohnson
[bolt]: https://github.com/boltdb/bolt
[hyc_symas]: https://twitter.com/hyc_symas [hyc_symas]: https://twitter.com/hyc_symas
[lmdb]: http://symas.com/mdb/ [lmdb]: http://symas.com/mdb/
## Project Status ## Project Status
Bolt is stable and the API is fixed. Full unit test coverage and randomized Bolt is stable, the API is fixed, and the file format is fixed. Full unit
black box testing are used to ensure database consistency and thread safety. test coverage and randomized black box testing are used to ensure database
Bolt is currently in high-load production environments serving databases as consistency and thread safety. Bolt is currently used in high-load production
large as 1TB. Many companies such as Shopify and Heroku use Bolt-backed environments serving databases as large as 1TB. Many companies such as
services every day. Shopify and Heroku use Bolt-backed services every day.
## Table of Contents ## Table of Contents
@ -59,7 +71,7 @@ services every day.
To start using Bolt, install Go and run `go get`: To start using Bolt, install Go and run `go get`:
```sh ```sh
$ go get github.com/boltdb/bolt/... $ go get github.com/coreos/bbolt/...
``` ```
This will retrieve the library and install the `bolt` command line utility into This will retrieve the library and install the `bolt` command line utility into
@ -79,7 +91,7 @@ package main
import ( import (
"log" "log"
"github.com/boltdb/bolt" bolt "github.com/coreos/bbolt"
) )
func main() { func main() {
@ -209,7 +221,7 @@ and then safely close your transaction if an error is returned. This is the
recommended way to use Bolt transactions. recommended way to use Bolt transactions.
However, sometimes you may want to manually start and end your transactions. However, sometimes you may want to manually start and end your transactions.
You can use the `Tx.Begin()` function directly but **please** be sure to close You can use the `DB.Begin()` function directly but **please** be sure to close
the transaction. the transaction.
```go ```go
@ -395,7 +407,7 @@ db.View(func(tx *bolt.Tx) error {
c := tx.Bucket([]byte("MyBucket")).Cursor() c := tx.Bucket([]byte("MyBucket")).Cursor()
prefix := []byte("1234") prefix := []byte("1234")
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() { for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() {
fmt.Printf("key=%s, value=%s\n", k, v) fmt.Printf("key=%s, value=%s\n", k, v)
} }
@ -448,6 +460,10 @@ db.View(func(tx *bolt.Tx) error {
}) })
``` ```
Please note that keys and values in `ForEach()` are only valid while
the transaction is open. If you need to use a key or value outside of
the transaction, you must use `copy()` to copy it to another byte
slice.
### Nested buckets ### Nested buckets
@ -460,6 +476,55 @@ func (*Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error)
func (*Bucket) DeleteBucket(key []byte) error func (*Bucket) DeleteBucket(key []byte) error
``` ```
Say you had a multi-tenant application where the root level bucket was the account bucket. Inside of this bucket was a sequence of accounts which themselves are buckets. And inside the sequence bucket you could have many buckets pertaining to the Account itself (Users, Notes, etc) isolating the information into logical groupings.
```go
// createUser creates a new user in the given account.
func createUser(accountID int, u *User) error {
// Start the transaction.
tx, err := db.Begin(true)
if err != nil {
return err
}
defer tx.Rollback()
// Retrieve the root bucket for the account.
// Assume this has already been created when the account was set up.
root := tx.Bucket([]byte(strconv.FormatUint(accountID, 10)))
// Setup the users bucket.
bkt, err := root.CreateBucketIfNotExists([]byte("USERS"))
if err != nil {
return err
}
// Generate an ID for the new user.
userID, err := bkt.NextSequence()
if err != nil {
return err
}
u.ID = userID
// Marshal and save the encoded user.
if buf, err := json.Marshal(u); err != nil {
return err
} else if err := bkt.Put([]byte(strconv.FormatUint(u.ID, 10)), buf); err != nil {
return err
}
// Commit the transaction.
if err := tx.Commit(); err != nil {
return err
}
return nil
}
```
### Database backups ### Database backups
@ -469,7 +534,7 @@ this from a read-only transaction, it will perform a hot backup and not block
your other database reads and writes. your other database reads and writes.
By default, it will use a regular file handle which will utilize the operating By default, it will use a regular file handle which will utilize the operating
system's page cache. See the [`Tx`](https://godoc.org/github.com/boltdb/bolt#Tx) system's page cache. See the [`Tx`](https://godoc.org/github.com/coreos/bbolt#Tx)
documentation for information about optimizing for larger-than-RAM datasets. documentation for information about optimizing for larger-than-RAM datasets.
One common use case is to backup over HTTP so you can use tools like `cURL` to One common use case is to backup over HTTP so you can use tools like `cURL` to
@ -715,6 +780,9 @@ Here are a few things to note when evaluating and using Bolt:
can be reused by a new page or can be unmapped from virtual memory and you'll can be reused by a new page or can be unmapped from virtual memory and you'll
see an `unexpected fault address` panic when accessing it. see an `unexpected fault address` panic when accessing it.
* Bolt uses an exclusive write lock on the database file so it cannot be
shared by multiple processes.
* Be careful when using `Bucket.FillPercent`. Setting a high fill percent for * Be careful when using `Bucket.FillPercent`. Setting a high fill percent for
buckets that have random inserts will cause your database to have very poor buckets that have random inserts will cause your database to have very poor
page utilization. page utilization.
@ -755,7 +823,7 @@ Here are a few things to note when evaluating and using Bolt:
## Reading the Source ## Reading the Source
Bolt is a relatively small code base (<3KLOC) for an embedded, serializable, Bolt is a relatively small code base (<5KLOC) for an embedded, serializable,
transactional key/value database so it can be a good starting point for people transactional key/value database so it can be a good starting point for people
interested in how databases work. interested in how databases work.
@ -848,5 +916,13 @@ Below is a list of public, open source projects that use Bolt:
* [Algernon](https://github.com/xyproto/algernon) - A HTTP/2 web server with built-in support for Lua. Uses BoltDB as the default database backend. * [Algernon](https://github.com/xyproto/algernon) - A HTTP/2 web server with built-in support for Lua. Uses BoltDB as the default database backend.
* [MuLiFS](https://github.com/dankomiocevic/mulifs) - Music Library Filesystem creates a filesystem to organise your music files. * [MuLiFS](https://github.com/dankomiocevic/mulifs) - Music Library Filesystem creates a filesystem to organise your music files.
* [GoShort](https://github.com/pankajkhairnar/goShort) - GoShort is a URL shortener written in Golang and BoltDB for persistent key/value storage and for routing it's using high performent HTTPRouter. * [GoShort](https://github.com/pankajkhairnar/goShort) - GoShort is a URL shortener written in Golang and BoltDB for persistent key/value storage and for routing it's using high performent HTTPRouter.
* [torrent](https://github.com/anacrolix/torrent) - Full-featured BitTorrent client package and utilities in Go. BoltDB is a storage backend in development.
* [gopherpit](https://github.com/gopherpit/gopherpit) - A web service to manage Go remote import paths with custom domains
* [bolter](https://github.com/hasit/bolter) - Command-line app for viewing BoltDB file in your terminal.
* [boltcli](https://github.com/spacewander/boltcli) - the redis-cli for boltdb with Lua script support.
* [btcwallet](https://github.com/btcsuite/btcwallet) - A bitcoin wallet.
* [dcrwallet](https://github.com/decred/dcrwallet) - A wallet for the Decred cryptocurrency.
* [Ironsmith](https://github.com/timshannon/ironsmith) - A simple, script-driven continuous integration (build - > test -> release) tool, with no external dependencies
* [BoltHold](https://github.com/timshannon/bolthold) - An embeddable NoSQL store for Go types built on BoltDB
If you are using Bolt in a project please send a pull request to add it to the list. If you are using Bolt in a project please send a pull request to add it to the list.

View File

@ -5,3 +5,6 @@ const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers. // maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@ -5,3 +5,6 @@ const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers. // maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

28
vendor/github.com/coreos/bbolt/bolt_arm.go generated vendored Normal file
View File

@ -0,0 +1,28 @@
package bolt
import "unsafe"
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned bool
func init() {
// Simple check to see whether this arch handles unaligned load/stores
// correctly.
// ARM9 and older devices require load/stores to be from/to aligned
// addresses. If not, the lower 2 bits are cleared and that address is
// read in a jumbled up order.
// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
brokenUnaligned = val != 0x11222211
}

View File

@ -7,3 +7,6 @@ const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers. // maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

12
vendor/github.com/coreos/bbolt/bolt_mips64x.go generated vendored Normal file
View File

@ -0,0 +1,12 @@
// +build mips64 mips64le
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x8000000000 // 512GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@ -1,7 +1,12 @@
// +build mips mipsle
package bolt package bolt
// maxMapSize represents the largest mmap size supported by Bolt. // maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB const maxMapSize = 0x40000000 // 1GB
// maxAllocSize is the size used when creating array pointers. // maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@ -7,3 +7,6 @@ const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers. // maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@ -7,3 +7,6 @@ const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers. // maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@ -7,3 +7,6 @@ const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers. // maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@ -13,29 +13,32 @@ import (
// flock acquires an advisory lock on a file descriptor. // flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
var t time.Time var t time.Time
for { if timeout != 0 {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now() t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
} }
flag := syscall.LOCK_SH fd := db.file.Fd()
flag := syscall.LOCK_NB
if exclusive { if exclusive {
flag = syscall.LOCK_EX flag |= syscall.LOCK_EX
} else {
flag |= syscall.LOCK_SH
} }
for {
// Otherwise attempt to obtain an exclusive lock. // Attempt to obtain an exclusive lock.
err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB) err := syscall.Flock(int(fd), flag)
if err == nil { if err == nil {
return nil return nil
} else if err != syscall.EWOULDBLOCK { } else if err != syscall.EWOULDBLOCK {
return err return err
} }
// If we timed out then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
}
// Wait for a bit and try again. // Wait for a bit and try again.
time.Sleep(50 * time.Millisecond) time.Sleep(flockRetryTimeout)
} }
} }

View File

@ -13,34 +13,33 @@ import (
// flock acquires an advisory lock on a file descriptor. // flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
var t time.Time var t time.Time
for { if timeout != 0 {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now() t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
} }
var lock syscall.Flock_t fd := db.file.Fd()
lock.Start = 0 var lockType int16
lock.Len = 0
lock.Pid = 0
lock.Whence = 0
lock.Pid = 0
if exclusive { if exclusive {
lock.Type = syscall.F_WRLCK lockType = syscall.F_WRLCK
} else { } else {
lock.Type = syscall.F_RDLCK lockType = syscall.F_RDLCK
} }
err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock) for {
// Attempt to obtain an exclusive lock.
lock := syscall.Flock_t{Type: lockType}
err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
if err == nil { if err == nil {
return nil return nil
} else if err != syscall.EAGAIN { } else if err != syscall.EAGAIN {
return err return err
} }
// If we timed out then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
}
// Wait for a bit and try again. // Wait for a bit and try again.
time.Sleep(50 * time.Millisecond) time.Sleep(flockRetryTimeout)
} }
} }

View File

@ -59,29 +59,30 @@ func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) erro
db.lockfile = f db.lockfile = f
var t time.Time var t time.Time
for { if timeout != 0 {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now() t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
} }
fd := f.Fd()
var flag uint32 = flagLockFailImmediately var flag uint32 = flagLockFailImmediately
if exclusive { if exclusive {
flag |= flagLockExclusive flag |= flagLockExclusive
} }
for {
err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{}) // Attempt to obtain an exclusive lock.
err := lockFileEx(syscall.Handle(fd), flag, 0, 1, 0, &syscall.Overlapped{})
if err == nil { if err == nil {
return nil return nil
} else if err != errLockViolation { } else if err != errLockViolation {
return err return err
} }
// If we timed oumercit then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
}
// Wait for a bit and try again. // Wait for a bit and try again.
time.Sleep(50 * time.Millisecond) time.Sleep(flockRetryTimeout)
} }
} }
@ -89,7 +90,7 @@ func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) erro
func funlock(db *DB) error { func funlock(db *DB) error {
err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{}) err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
db.lockfile.Close() db.lockfile.Close()
os.Remove(db.path+lockExt) os.Remove(db.path + lockExt)
return err return err
} }

View File

@ -14,13 +14,6 @@ const (
MaxValueSize = (1 << 31) - 2 MaxValueSize = (1 << 31) - 2
) )
const (
maxUint = ^uint(0)
minUint = 0
maxInt = int(^uint(0) >> 1)
minInt = -maxInt - 1
)
const bucketHeaderSize = int(unsafe.Sizeof(bucket{})) const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
const ( const (
@ -130,9 +123,17 @@ func (b *Bucket) Bucket(name []byte) *Bucket {
func (b *Bucket) openBucket(value []byte) *Bucket { func (b *Bucket) openBucket(value []byte) *Bucket {
var child = newBucket(b.tx) var child = newBucket(b.tx)
// If unaligned load/stores are broken on this arch and value is
// unaligned simply clone to an aligned byte array.
unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
if unaligned {
value = cloneBytes(value)
}
// If this is a writable transaction then we need to copy the bucket entry. // If this is a writable transaction then we need to copy the bucket entry.
// Read-only transactions can point directly at the mmap entry. // Read-only transactions can point directly at the mmap entry.
if b.tx.writable { if b.tx.writable && !unaligned {
child.bucket = &bucket{} child.bucket = &bucket{}
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0])) *child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
} else { } else {
@ -167,9 +168,8 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
if bytes.Equal(key, k) { if bytes.Equal(key, k) {
if (flags & bucketLeafFlag) != 0 { if (flags & bucketLeafFlag) != 0 {
return nil, ErrBucketExists return nil, ErrBucketExists
} else {
return nil, ErrIncompatibleValue
} }
return nil, ErrIncompatibleValue
} }
// Create empty, inline bucket. // Create empty, inline bucket.
@ -316,7 +316,12 @@ func (b *Bucket) Delete(key []byte) error {
// Move cursor to correct position. // Move cursor to correct position.
c := b.Cursor() c := b.Cursor()
_, _, flags := c.seek(key) k, _, flags := c.seek(key)
// Return nil if the key doesn't exist.
if !bytes.Equal(key, k) {
return nil
}
// Return an error if there is already existing bucket value. // Return an error if there is already existing bucket value.
if (flags & bucketLeafFlag) != 0 { if (flags & bucketLeafFlag) != 0 {
@ -329,6 +334,28 @@ func (b *Bucket) Delete(key []byte) error {
return nil return nil
} }
// Sequence returns the current integer for the bucket without incrementing it.
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
// SetSequence updates the sequence number for the bucket.
func (b *Bucket) SetSequence(v uint64) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
}
// Increment and return the sequence.
b.bucket.sequence = v
return nil
}
// NextSequence returns an autoincrementing integer for the bucket. // NextSequence returns an autoincrementing integer for the bucket.
func (b *Bucket) NextSequence() (uint64, error) { func (b *Bucket) NextSequence() (uint64, error) {
if b.tx.db == nil { if b.tx.db == nil {

View File

@ -7,8 +7,7 @@ import (
"log" "log"
"os" "os"
"runtime" "runtime"
"runtime/debug" "sort"
"strings"
"sync" "sync"
"time" "time"
"unsafe" "unsafe"
@ -23,6 +22,8 @@ const version = 2
// Represents a marker value to indicate that a file is a Bolt DB. // Represents a marker value to indicate that a file is a Bolt DB.
const magic uint32 = 0xED0CDAED const magic uint32 = 0xED0CDAED
const pgidNoFreelist pgid = 0xffffffffffffffff
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
// syncing changes to a file. This is required as some operating systems, // syncing changes to a file. This is required as some operating systems,
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
@ -39,6 +40,9 @@ const (
// default page size for db is set to the OS page size. // default page size for db is set to the OS page size.
var defaultPageSize = os.Getpagesize() var defaultPageSize = os.Getpagesize()
// The time elapsed between consecutive file locking attempts.
const flockRetryTimeout = 50 * time.Millisecond
// DB represents a collection of buckets persisted to a file on disk. // DB represents a collection of buckets persisted to a file on disk.
// All data access is performed through transactions which can be obtained through the DB. // All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
@ -61,6 +65,11 @@ type DB struct {
// THIS IS UNSAFE. PLEASE USE WITH CAUTION. // THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool NoSync bool
// When true, skips syncing freelist to disk. This improves the database
// write performance under normal operation, but requires a full database
// re-sync during recovery.
NoFreelistSync bool
// When true, skips the truncate call when growing the database. // When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems. // Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and // Skipping truncation avoids preallocation of hard drive space and
@ -107,9 +116,11 @@ type DB struct {
opened bool opened bool
rwtx *Tx rwtx *Tx
txs []*Tx txs []*Tx
freelist *freelist
stats Stats stats Stats
freelist *freelist
freelistLoad sync.Once
pagePool sync.Pool pagePool sync.Pool
batchMu sync.Mutex batchMu sync.Mutex
@ -148,14 +159,17 @@ func (db *DB) String() string {
// If the file does not exist then it will be created automatically. // If the file does not exist then it will be created automatically.
// Passing in nil options will cause Bolt to open the database with the default options. // Passing in nil options will cause Bolt to open the database with the default options.
func Open(path string, mode os.FileMode, options *Options) (*DB, error) { func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
var db = &DB{opened: true} db := &DB{
opened: true,
}
// Set default options if no options are provided. // Set default options if no options are provided.
if options == nil { if options == nil {
options = DefaultOptions options = DefaultOptions
} }
db.NoSync = options.NoSync
db.NoGrowSync = options.NoGrowSync db.NoGrowSync = options.NoGrowSync
db.MmapFlags = options.MmapFlags db.MmapFlags = options.MmapFlags
db.NoFreelistSync = options.NoFreelistSync
// Set default values for later DB operations. // Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize db.MaxBatchSize = DefaultMaxBatchSize
@ -184,6 +198,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
// The database file is locked using the shared lock (more than one process may // The database file is locked using the shared lock (more than one process may
// hold a lock at the same time) otherwise (options.ReadOnly is set). // hold a lock at the same time) otherwise (options.ReadOnly is set).
if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil { if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
db.lockfile = nil // make 'unused' happy. TODO: rework locks
_ = db.close() _ = db.close()
return nil, err return nil, err
} }
@ -191,6 +206,11 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
// Default values for test hooks // Default values for test hooks
db.ops.writeAt = db.file.WriteAt db.ops.writeAt = db.file.WriteAt
if db.pageSize = options.PageSize; db.pageSize == 0 {
// Set the default page size to the OS page size.
db.pageSize = defaultPageSize
}
// Initialize the database if it doesn't exist. // Initialize the database if it doesn't exist.
if info, err := db.file.Stat(); err != nil { if info, err := db.file.Stat(); err != nil {
return nil, err return nil, err
@ -202,20 +222,21 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
} else { } else {
// Read the first meta page to determine the page size. // Read the first meta page to determine the page size.
var buf [0x1000]byte var buf [0x1000]byte
if _, err := db.file.ReadAt(buf[:], 0); err == nil { // If we can't read the page size, but can read a page, assume
m := db.pageInBuffer(buf[:], 0).meta() // it's the same as the OS or one given -- since that's how the
if err := m.validate(); err != nil { // page size was chosen in the first place.
// If we can't read the page size, we can assume it's the same
// as the OS -- since that's how the page size was chosen in the
// first place.
// //
// If the first page is invalid and this OS uses a different // If the first page is invalid and this OS uses a different
// page size than what the database was created with then we // page size than what the database was created with then we
// are out of luck and cannot access the database. // are out of luck and cannot access the database.
db.pageSize = os.Getpagesize() //
} else { // TODO: scan for next page
if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
db.pageSize = int(m.pageSize) db.pageSize = int(m.pageSize)
} }
} else {
return nil, ErrInvalid
} }
} }
@ -232,14 +253,50 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
return nil, err return nil, err
} }
// Read in the freelist. if db.readOnly {
db.freelist = newFreelist() return db, nil
db.freelist.read(db.page(db.meta().freelist)) }
db.loadFreelist()
// Flush freelist when transitioning from no sync to sync so
// NoFreelistSync unaware boltdb can open the db later.
if !db.NoFreelistSync && !db.hasSyncedFreelist() {
tx, err := db.Begin(true)
if tx != nil {
err = tx.Commit()
}
if err != nil {
_ = db.close()
return nil, err
}
}
// Mark the database as opened and return. // Mark the database as opened and return.
return db, nil return db, nil
} }
// loadFreelist reads the freelist if it is synced, or reconstructs it
// by scanning the DB if it is not synced. It assumes there are no
// concurrent accesses being made to the freelist.
func (db *DB) loadFreelist() {
db.freelistLoad.Do(func() {
db.freelist = newFreelist()
if !db.hasSyncedFreelist() {
// Reconstruct free list by scanning the DB.
db.freelist.readIDs(db.freepages())
} else {
// Read free list from freelist page.
db.freelist.read(db.page(db.meta().freelist))
}
db.stats.FreePageN = len(db.freelist.ids)
})
}
func (db *DB) hasSyncedFreelist() bool {
return db.meta().freelist != pgidNoFreelist
}
// mmap opens the underlying memory-mapped file and initializes the meta references. // mmap opens the underlying memory-mapped file and initializes the meta references.
// minsz is the minimum size that the new mmap can be. // minsz is the minimum size that the new mmap can be.
func (db *DB) mmap(minsz int) error { func (db *DB) mmap(minsz int) error {
@ -341,9 +398,6 @@ func (db *DB) mmapSize(size int) (int, error) {
// init creates a new database file and initializes its meta pages. // init creates a new database file and initializes its meta pages.
func (db *DB) init() error { func (db *DB) init() error {
// Set the page size to the OS page size.
db.pageSize = os.Getpagesize()
// Create two meta pages on a buffer. // Create two meta pages on a buffer.
buf := make([]byte, db.pageSize*4) buf := make([]byte, db.pageSize*4)
for i := 0; i < 2; i++ { for i := 0; i < 2; i++ {
@ -526,21 +580,36 @@ func (db *DB) beginRWTx() (*Tx, error) {
t := &Tx{writable: true} t := &Tx{writable: true}
t.init(db) t.init(db)
db.rwtx = t db.rwtx = t
db.freePages()
return t, nil
}
// Free any pages associated with closed read-only transactions. // freePages releases any pages associated with closed read-only transactions.
var minid txid = 0xFFFFFFFFFFFFFFFF func (db *DB) freePages() {
for _, t := range db.txs { // Free all pending pages prior to earliest open transaction.
if t.meta.txid < minid { sort.Sort(txsById(db.txs))
minid = t.meta.txid minid := txid(0xFFFFFFFFFFFFFFFF)
} if len(db.txs) > 0 {
minid = db.txs[0].meta.txid
} }
if minid > 0 { if minid > 0 {
db.freelist.release(minid - 1) db.freelist.release(minid - 1)
} }
// Release unused txid extents.
return t, nil for _, t := range db.txs {
db.freelist.releaseRange(minid, t.meta.txid-1)
minid = t.meta.txid + 1
}
db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
// Any page both allocated and freed in an extent is safe to release.
} }
type txsById []*Tx
func (t txsById) Len() int { return len(t) }
func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
// removeTx removes a transaction from the database. // removeTx removes a transaction from the database.
func (db *DB) removeTx(tx *Tx) { func (db *DB) removeTx(tx *Tx) {
// Release the read lock on the mmap. // Release the read lock on the mmap.
@ -552,7 +621,10 @@ func (db *DB) removeTx(tx *Tx) {
// Remove the transaction. // Remove the transaction.
for i, t := range db.txs { for i, t := range db.txs {
if t == tx { if t == tx {
db.txs = append(db.txs[:i], db.txs[i+1:]...) last := len(db.txs) - 1
db.txs[i] = db.txs[last]
db.txs[last] = nil
db.txs = db.txs[:last]
break break
} }
} }
@ -630,11 +702,7 @@ func (db *DB) View(fn func(*Tx) error) error {
return err return err
} }
if err := t.Rollback(); err != nil { return t.Rollback()
return err
}
return nil
} }
// Batch calls fn as part of a batch. It behaves similar to Update, // Batch calls fn as part of a batch. It behaves similar to Update,
@ -734,10 +802,8 @@ retry:
// pass success, or bolt internal errors, to all callers // pass success, or bolt internal errors, to all callers
for _, c := range b.calls { for _, c := range b.calls {
if c.err != nil {
c.err <- err c.err <- err
} }
}
break retry break retry
} }
} }
@ -823,7 +889,7 @@ func (db *DB) meta() *meta {
} }
// allocate returns a contiguous block of memory starting at a given page. // allocate returns a contiguous block of memory starting at a given page.
func (db *DB) allocate(count int) (*page, error) { func (db *DB) allocate(txid txid, count int) (*page, error) {
// Allocate a temporary buffer for the page. // Allocate a temporary buffer for the page.
var buf []byte var buf []byte
if count == 1 { if count == 1 {
@ -835,7 +901,7 @@ func (db *DB) allocate(count int) (*page, error) {
p.overflow = uint32(count - 1) p.overflow = uint32(count - 1)
// Use pages from the freelist if they are available. // Use pages from the freelist if they are available.
if p.id = db.freelist.allocate(count); p.id != 0 { if p.id = db.freelist.allocate(txid, count); p.id != 0 {
return p, nil return p, nil
} }
@ -890,6 +956,38 @@ func (db *DB) IsReadOnly() bool {
return db.readOnly return db.readOnly
} }
func (db *DB) freepages() []pgid {
tx, err := db.beginTx()
defer func() {
err = tx.Rollback()
if err != nil {
panic("freepages: failed to rollback tx")
}
}()
if err != nil {
panic("freepages: failed to open read only tx")
}
reachable := make(map[pgid]*page)
nofreed := make(map[pgid]bool)
ech := make(chan error)
go func() {
for e := range ech {
panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
}
}()
tx.checkBucket(&tx.root, reachable, nofreed, ech)
close(ech)
var fids []pgid
for i := pgid(2); i < db.meta().pgid; i++ {
if _, ok := reachable[i]; !ok {
fids = append(fids, i)
}
}
return fids
}
// Options represents the options that can be set when opening a database. // Options represents the options that can be set when opening a database.
type Options struct { type Options struct {
// Timeout is the amount of time to wait to obtain a file lock. // Timeout is the amount of time to wait to obtain a file lock.
@ -900,6 +998,10 @@ type Options struct {
// Sets the DB.NoGrowSync flag before memory mapping the file. // Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool NoGrowSync bool
// Do not sync freelist to disk. This improves the database write performance
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX). // grab a shared lock (UNIX).
ReadOnly bool ReadOnly bool
@ -916,6 +1018,14 @@ type Options struct {
// If initialMmapSize is smaller than the previous database size, // If initialMmapSize is smaller than the previous database size,
// it takes no effect. // it takes no effect.
InitialMmapSize int InitialMmapSize int
// PageSize overrides the default OS page size.
PageSize int
// NoSync sets the initial value of DB.NoSync. Normally this can just be
// set directly on the DB itself when returned from Open(), but this option
// is useful in APIs which expose Options but not the underlying DB.
NoSync bool
} }
// DefaultOptions represent the options used if nil options are passed into Open(). // DefaultOptions represent the options used if nil options are passed into Open().
@ -952,15 +1062,11 @@ func (s *Stats) Sub(other *Stats) Stats {
diff.PendingPageN = s.PendingPageN diff.PendingPageN = s.PendingPageN
diff.FreeAlloc = s.FreeAlloc diff.FreeAlloc = s.FreeAlloc
diff.FreelistInuse = s.FreelistInuse diff.FreelistInuse = s.FreelistInuse
diff.TxN = other.TxN - s.TxN diff.TxN = s.TxN - other.TxN
diff.TxStats = s.TxStats.Sub(&other.TxStats) diff.TxStats = s.TxStats.Sub(&other.TxStats)
return diff return diff
} }
func (s *Stats) add(other *Stats) {
s.TxStats.add(&other.TxStats)
}
type Info struct { type Info struct {
Data uintptr Data uintptr
PageSize int PageSize int
@ -999,7 +1105,8 @@ func (m *meta) copy(dest *meta) {
func (m *meta) write(p *page) { func (m *meta) write(p *page) {
if m.root.root >= m.pgid { if m.root.root >= m.pgid {
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)) panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
} else if m.freelist >= m.pgid { } else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
// TODO: reject pgidNoFreeList if !NoFreelistSync
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)) panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
} }
@ -1026,11 +1133,3 @@ func _assert(condition bool, msg string, v ...interface{}) {
panic(fmt.Sprintf("assertion failed: "+msg, v...)) panic(fmt.Sprintf("assertion failed: "+msg, v...))
} }
} }
func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
func printstack() {
stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
fmt.Fprintln(os.Stderr, stack)
}

View File

@ -6,25 +6,40 @@ import (
"unsafe" "unsafe"
) )
// txPending holds a list of pgids and corresponding allocation txns
// that are pending to be freed.
type txPending struct {
ids []pgid
alloctx []txid // txids allocating the ids
lastReleaseBegin txid // beginning txid of last matching releaseRange
}
// freelist represents a list of all pages that are available for allocation. // freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions. // It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct { type freelist struct {
ids []pgid // all free and available free page ids. ids []pgid // all free and available free page ids.
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx. allocs map[pgid]txid // mapping of txid that allocated a pgid.
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids. cache map[pgid]bool // fast lookup of all free and pending page ids.
} }
// newFreelist returns an empty, initialized freelist. // newFreelist returns an empty, initialized freelist.
func newFreelist() *freelist { func newFreelist() *freelist {
return &freelist{ return &freelist{
pending: make(map[txid][]pgid), allocs: make(map[pgid]txid),
pending: make(map[txid]*txPending),
cache: make(map[pgid]bool), cache: make(map[pgid]bool),
} }
} }
// size returns the size of the page after serialization. // size returns the size of the page after serialization.
func (f *freelist) size() int { func (f *freelist) size() int {
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * f.count()) n := f.count()
if n >= 0xFFFF {
// The first element will be used to store the count. See freelist.write.
n++
}
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
} }
// count returns count of pages on the freelist // count returns count of pages on the freelist
@ -40,27 +55,26 @@ func (f *freelist) free_count() int {
// pending_count returns count of pending pages // pending_count returns count of pending pages
func (f *freelist) pending_count() int { func (f *freelist) pending_count() int {
var count int var count int
for _, list := range f.pending { for _, txp := range f.pending {
count += len(list) count += len(txp.ids)
} }
return count return count
} }
// all returns a list of all free ids and all pending ids in one sorted list. // copyall copies into dst a list of all free ids and all pending ids in one sorted list.
func (f *freelist) all() []pgid { // f.count returns the minimum length required for dst.
m := make(pgids, 0) func (f *freelist) copyall(dst []pgid) {
m := make(pgids, 0, f.pending_count())
for _, list := range f.pending { for _, txp := range f.pending {
m = append(m, list...) m = append(m, txp.ids...)
} }
sort.Sort(m) sort.Sort(m)
return pgids(f.ids).merge(m) mergepgids(dst, f.ids, m)
} }
// allocate returns the starting page id of a contiguous list of pages of a given size. // allocate returns the starting page id of a contiguous list of pages of a given size.
// If a contiguous block cannot be found then 0 is returned. // If a contiguous block cannot be found then 0 is returned.
func (f *freelist) allocate(n int) pgid { func (f *freelist) allocate(txid txid, n int) pgid {
if len(f.ids) == 0 { if len(f.ids) == 0 {
return 0 return 0
} }
@ -93,7 +107,7 @@ func (f *freelist) allocate(n int) pgid {
for i := pgid(0); i < pgid(n); i++ { for i := pgid(0); i < pgid(n); i++ {
delete(f.cache, initial+i) delete(f.cache, initial+i)
} }
f.allocs[initial] = txid
return initial return initial
} }
@ -110,28 +124,73 @@ func (f *freelist) free(txid txid, p *page) {
} }
// Free page and all its overflow pages. // Free page and all its overflow pages.
var ids = f.pending[txid] txp := f.pending[txid]
if txp == nil {
txp = &txPending{}
f.pending[txid] = txp
}
allocTxid, ok := f.allocs[p.id]
if ok {
delete(f.allocs, p.id)
} else if (p.flags & freelistPageFlag) != 0 {
// Freelist is always allocated by prior tx.
allocTxid = txid - 1
}
for id := p.id; id <= p.id+pgid(p.overflow); id++ { for id := p.id; id <= p.id+pgid(p.overflow); id++ {
// Verify that page is not already free. // Verify that page is not already free.
if f.cache[id] { if f.cache[id] {
panic(fmt.Sprintf("page %d already freed", id)) panic(fmt.Sprintf("page %d already freed", id))
} }
// Add to the freelist and cache. // Add to the freelist and cache.
ids = append(ids, id) txp.ids = append(txp.ids, id)
txp.alloctx = append(txp.alloctx, allocTxid)
f.cache[id] = true f.cache[id] = true
} }
f.pending[txid] = ids
} }
// release moves all page ids for a transaction id (or older) to the freelist. // release moves all page ids for a transaction id (or older) to the freelist.
func (f *freelist) release(txid txid) { func (f *freelist) release(txid txid) {
m := make(pgids, 0) m := make(pgids, 0)
for tid, ids := range f.pending { for tid, txp := range f.pending {
if tid <= txid { if tid <= txid {
// Move transaction's pending pages to the available freelist. // Move transaction's pending pages to the available freelist.
// Don't remove from the cache since the page is still free. // Don't remove from the cache since the page is still free.
m = append(m, ids...) m = append(m, txp.ids...)
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
func (f *freelist) releaseRange(begin, end txid) {
if begin > end {
return
}
var m pgids
for tid, txp := range f.pending {
if tid < begin || tid > end {
continue
}
// Don't recompute freed pages if ranges haven't updated.
if txp.lastReleaseBegin == begin {
continue
}
for i := 0; i < len(txp.ids); i++ {
if atx := txp.alloctx[i]; atx < begin || atx > end {
continue
}
m = append(m, txp.ids[i])
txp.ids[i] = txp.ids[len(txp.ids)-1]
txp.ids = txp.ids[:len(txp.ids)-1]
txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
i--
}
txp.lastReleaseBegin = begin
if len(txp.ids) == 0 {
delete(f.pending, tid) delete(f.pending, tid)
} }
} }
@ -142,12 +201,29 @@ func (f *freelist) release(txid txid) {
// rollback removes the pages from a given pending tx. // rollback removes the pages from a given pending tx.
func (f *freelist) rollback(txid txid) { func (f *freelist) rollback(txid txid) {
// Remove page ids from cache. // Remove page ids from cache.
for _, id := range f.pending[txid] { txp := f.pending[txid]
delete(f.cache, id) if txp == nil {
return
} }
var m pgids
// Remove pages from pending list. for i, pgid := range txp.ids {
delete(f.cache, pgid)
tx := txp.alloctx[i]
if tx == 0 {
continue
}
if tx != txid {
// Pending free aborted; restore page back to alloc list.
f.allocs[pgid] = tx
} else {
// Freed page was allocated by this txn; OK to throw away.
m = append(m, pgid)
}
}
// Remove pages from pending list and mark as free if allocated by txid.
delete(f.pending, txid) delete(f.pending, txid)
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
} }
// freed returns whether a given page is in the free list. // freed returns whether a given page is in the free list.
@ -157,6 +233,9 @@ func (f *freelist) freed(pgid pgid) bool {
// read initializes the freelist from a freelist page. // read initializes the freelist from a freelist page.
func (f *freelist) read(p *page) { func (f *freelist) read(p *page) {
if (p.flags & freelistPageFlag) == 0 {
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
}
// If the page.count is at the max uint16 value (64k) then it's considered // If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element. // an overflow and the size of the freelist is stored as the first element.
idx, count := 0, int(p.count) idx, count := 0, int(p.count)
@ -169,7 +248,7 @@ func (f *freelist) read(p *page) {
if count == 0 { if count == 0 {
f.ids = nil f.ids = nil
} else { } else {
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count] ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
f.ids = make([]pgid, len(ids)) f.ids = make([]pgid, len(ids))
copy(f.ids, ids) copy(f.ids, ids)
@ -181,27 +260,33 @@ func (f *freelist) read(p *page) {
f.reindex() f.reindex()
} }
// read initializes the freelist from a given list of ids.
func (f *freelist) readIDs(ids []pgid) {
f.ids = ids
f.reindex()
}
// write writes the page ids onto a freelist page. All free and pending ids are // write writes the page ids onto a freelist page. All free and pending ids are
// saved to disk since in the event of a program crash, all pending ids will // saved to disk since in the event of a program crash, all pending ids will
// become free. // become free.
func (f *freelist) write(p *page) error { func (f *freelist) write(p *page) error {
// Combine the old free pgids and pgids waiting on an open transaction. // Combine the old free pgids and pgids waiting on an open transaction.
ids := f.all()
// Update the header flag. // Update the header flag.
p.flags |= freelistPageFlag p.flags |= freelistPageFlag
// The page.count can only hold up to 64k elements so if we overflow that // The page.count can only hold up to 64k elements so if we overflow that
// number then we handle it by putting the size in the first element. // number then we handle it by putting the size in the first element.
if len(ids) == 0 { lenids := f.count()
p.count = uint16(len(ids)) if lenids == 0 {
} else if len(ids) < 0xFFFF { p.count = uint16(lenids)
p.count = uint16(len(ids)) } else if lenids < 0xFFFF {
copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:], ids) p.count = uint16(lenids)
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
} else { } else {
p.count = 0xFFFF p.count = 0xFFFF
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(len(ids)) ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:], ids) f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
} }
return nil return nil
@ -213,8 +298,8 @@ func (f *freelist) reload(p *page) {
// Build a cache of only pending pages. // Build a cache of only pending pages.
pcache := make(map[pgid]bool) pcache := make(map[pgid]bool)
for _, pendingIDs := range f.pending { for _, txp := range f.pending {
for _, pendingID := range pendingIDs { for _, pendingID := range txp.ids {
pcache[pendingID] = true pcache[pendingID] = true
} }
} }
@ -236,12 +321,12 @@ func (f *freelist) reload(p *page) {
// reindex rebuilds the free cache based on available and pending free lists. // reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() { func (f *freelist) reindex() {
f.cache = make(map[pgid]bool) f.cache = make(map[pgid]bool, len(f.ids))
for _, id := range f.ids { for _, id := range f.ids {
f.cache[id] = true f.cache[id] = true
} }
for _, pendingIDs := range f.pending { for _, txp := range f.pending {
for _, pendingID := range pendingIDs { for _, pendingID := range txp.ids {
f.cache[pendingID] = true f.cache[pendingID] = true
} }
} }

View File

@ -365,7 +365,7 @@ func (n *node) spill() error {
} }
// Allocate contiguous space for the node. // Allocate contiguous space for the node.
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1) p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
if err != nil { if err != nil {
return err return err
} }

View File

@ -145,12 +145,33 @@ func (a pgids) merge(b pgids) pgids {
// Return the opposite slice if one is nil. // Return the opposite slice if one is nil.
if len(a) == 0 { if len(a) == 0 {
return b return b
} else if len(b) == 0 { }
if len(b) == 0 {
return a return a
} }
merged := make(pgids, len(a)+len(b))
mergepgids(merged, a, b)
return merged
}
// Create a list to hold all elements from both lists. // mergepgids copies the sorted union of a and b into dst.
merged := make(pgids, 0, len(a)+len(b)) // If dst is too small, it panics.
func mergepgids(dst, a, b pgids) {
if len(dst) < len(a)+len(b) {
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
}
// Copy in the opposite slice if one is nil.
if len(a) == 0 {
copy(dst, b)
return
}
if len(b) == 0 {
copy(dst, a)
return
}
// Merged will hold all elements from both lists.
merged := dst[:0]
// Assign lead to the slice with a lower starting value, follow to the higher value. // Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b lead, follow := a, b
@ -172,7 +193,5 @@ func (a pgids) merge(b pgids) pgids {
} }
// Append what's left in follow. // Append what's left in follow.
merged = append(merged, follow...) _ = append(merged, follow...)
return merged
} }

View File

@ -126,10 +126,7 @@ func (tx *Tx) DeleteBucket(name []byte) error {
// the error is returned to the caller. // the error is returned to the caller.
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error { func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
return tx.root.ForEach(func(k, v []byte) error { return tx.root.ForEach(func(k, v []byte) error {
if err := fn(k, tx.root.Bucket(k)); err != nil { return fn(k, tx.root.Bucket(k))
return err
}
return nil
}) })
} }
@ -169,28 +166,18 @@ func (tx *Tx) Commit() error {
// Free the old root bucket. // Free the old root bucket.
tx.meta.root.root = tx.root.root tx.meta.root.root = tx.root.root
opgid := tx.meta.pgid // Free the old freelist because commit writes out a fresh freelist.
if tx.meta.freelist != pgidNoFreelist {
// Free the freelist and allocate new pages for it. This will overestimate
// the size of the freelist but not underestimate the size (which would be bad).
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
if err != nil {
tx.rollback()
return err
} }
if err := tx.db.freelist.write(p); err != nil {
tx.rollback()
return err
}
tx.meta.freelist = p.id
// If the high water mark has moved up then attempt to grow the database. if !tx.db.NoFreelistSync {
if tx.meta.pgid > opgid { err := tx.commitFreelist()
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { if err != nil {
tx.rollback()
return err return err
} }
} else {
tx.meta.freelist = pgidNoFreelist
} }
// Write dirty pages to disk. // Write dirty pages to disk.
@ -235,6 +222,31 @@ func (tx *Tx) Commit() error {
return nil return nil
} }
func (tx *Tx) commitFreelist() error {
// Allocate new pages for the new free list. This will overestimate
// the size of the freelist but not underestimate the size (which would be bad).
opgid := tx.meta.pgid
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
if err != nil {
tx.rollback()
return err
}
if err := tx.db.freelist.write(p); err != nil {
tx.rollback()
return err
}
tx.meta.freelist = p.id
// If the high water mark has moved up then attempt to grow the database.
if tx.meta.pgid > opgid {
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
tx.rollback()
return err
}
}
return nil
}
// Rollback closes the transaction and ignores all previous updates. Read-only // Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed. // transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error { func (tx *Tx) Rollback() error {
@ -305,7 +317,11 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
if err != nil { if err != nil {
return 0, err return 0, err
} }
defer func() { _ = f.Close() }() defer func() {
if cerr := f.Close(); err == nil {
err = cerr
}
}()
// Generate a meta page. We use the same page data for both meta pages. // Generate a meta page. We use the same page data for both meta pages.
buf := make([]byte, tx.db.pageSize) buf := make([]byte, tx.db.pageSize)
@ -333,7 +349,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
} }
// Move past the meta pages in the file. // Move past the meta pages in the file.
if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil { if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
return n, fmt.Errorf("seek: %s", err) return n, fmt.Errorf("seek: %s", err)
} }
@ -344,7 +360,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
return n, err return n, err
} }
return n, f.Close() return n, nil
} }
// CopyFile copies the entire database to file at the given path. // CopyFile copies the entire database to file at the given path.
@ -379,9 +395,14 @@ func (tx *Tx) Check() <-chan error {
} }
func (tx *Tx) check(ch chan error) { func (tx *Tx) check(ch chan error) {
// Force loading free list if opened in ReadOnly mode.
tx.db.loadFreelist()
// Check if any pages are double freed. // Check if any pages are double freed.
freed := make(map[pgid]bool) freed := make(map[pgid]bool)
for _, id := range tx.db.freelist.all() { all := make([]pgid, tx.db.freelist.count())
tx.db.freelist.copyall(all)
for _, id := range all {
if freed[id] { if freed[id] {
ch <- fmt.Errorf("page %d: already freed", id) ch <- fmt.Errorf("page %d: already freed", id)
} }
@ -392,9 +413,11 @@ func (tx *Tx) check(ch chan error) {
reachable := make(map[pgid]*page) reachable := make(map[pgid]*page)
reachable[0] = tx.page(0) // meta0 reachable[0] = tx.page(0) // meta0
reachable[1] = tx.page(1) // meta1 reachable[1] = tx.page(1) // meta1
if tx.meta.freelist != pgidNoFreelist {
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
} }
}
// Recursively check buckets. // Recursively check buckets.
tx.checkBucket(&tx.root, reachable, freed, ch) tx.checkBucket(&tx.root, reachable, freed, ch)
@ -451,7 +474,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
// allocate returns a contiguous block of memory starting at a given page. // allocate returns a contiguous block of memory starting at a given page.
func (tx *Tx) allocate(count int) (*page, error) { func (tx *Tx) allocate(count int) (*page, error) {
p, err := tx.db.allocate(count) p, err := tx.db.allocate(tx.meta.txid, count)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -460,7 +483,7 @@ func (tx *Tx) allocate(count int) (*page, error) {
tx.pages[p.id] = p tx.pages[p.id] = p
// Update statistics. // Update statistics.
tx.stats.PageCount++ tx.stats.PageCount += count
tx.stats.PageAlloc += count * tx.db.pageSize tx.stats.PageAlloc += count * tx.db.pageSize
return p, nil return p, nil

View File

@ -608,7 +608,7 @@ type raftReadyHandler struct {
} }
func (s *EtcdServer) run() { func (s *EtcdServer) run() {
snap, err := s.r.raftStorage.Snapshot() snapshot, err := s.r.raftStorage.Snapshot()
if err != nil { if err != nil {
plog.Panicf("get snapshot from raft storage error: %v", err) plog.Panicf("get snapshot from raft storage error: %v", err)
} }
@ -666,10 +666,10 @@ func (s *EtcdServer) run() {
// asynchronously accept apply packets, dispatch progress in-order // asynchronously accept apply packets, dispatch progress in-order
sched := schedule.NewFIFOScheduler() sched := schedule.NewFIFOScheduler()
ep := etcdProgress{ ep := etcdProgress{
confState: snap.Metadata.ConfState, confState: snapshot.Metadata.ConfState,
snapi: snap.Metadata.Index, snapi: snapshot.Metadata.Index,
appliedt: snap.Metadata.Term, appliedt: snapshot.Metadata.Term,
appliedi: snap.Metadata.Index, appliedi: snapshot.Metadata.Index,
} }
defer func() { defer func() {

View File

@ -25,7 +25,7 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/boltdb/bolt" bolt "github.com/coreos/bbolt"
"github.com/coreos/pkg/capnslog" "github.com/coreos/pkg/capnslog"
) )

View File

@ -20,7 +20,7 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/boltdb/bolt" bolt "github.com/coreos/bbolt"
) )
type BatchTx interface { type BatchTx interface {

View File

@ -16,6 +16,6 @@
package backend package backend
import "github.com/boltdb/bolt" import bolt "github.com/coreos/bbolt"
var boltOpenOptions *bolt.Options = nil var boltOpenOptions *bolt.Options = nil

View File

@ -17,7 +17,7 @@ package backend
import ( import (
"syscall" "syscall"
"github.com/boltdb/bolt" bolt "github.com/coreos/bbolt"
) )
// syscall.MAP_POPULATE on linux 2.6.23+ does sequential read-ahead // syscall.MAP_POPULATE on linux 2.6.23+ does sequential read-ahead

View File

@ -258,6 +258,21 @@ func (s *watchableStore) cancelWatcher(wa *watcher) {
s.mu.Unlock() s.mu.Unlock()
} }
func (s *watchableStore) Restore(b backend.Backend) error {
s.mu.Lock()
defer s.mu.Unlock()
err := s.store.Restore(b)
if err != nil {
return err
}
for wa := range s.synced.watchers {
s.unsynced.watchers.add(wa)
}
s.synced = newWatcherGroup()
return nil
}
// syncWatchersLoop syncs the watcher in the unsynced map every 100ms. // syncWatchersLoop syncs the watcher in the unsynced map every 100ms.
func (s *watchableStore) syncWatchersLoop() { func (s *watchableStore) syncWatchersLoop() {
defer s.wg.Done() defer s.wg.Done()

View File

@ -26,7 +26,7 @@ import (
var ( var (
// MinClusterVersion is the min cluster version this etcd binary is compatible with. // MinClusterVersion is the min cluster version this etcd binary is compatible with.
MinClusterVersion = "3.0.0" MinClusterVersion = "3.0.0"
Version = "3.1.10" Version = "3.1.11"
APIVersion = "unknown" APIVersion = "unknown"
// Git SHA Value will be set during build // Git SHA Value will be set during build

View File

@ -44,6 +44,7 @@ Ivan Krasin <krasin@golang.org>
Jason Hall <jasonhall@google.com> Jason Hall <jasonhall@google.com>
Johan Euphrosine <proppy@google.com> Johan Euphrosine <proppy@google.com>
Kostik Shtoyk <kostik@google.com> Kostik Shtoyk <kostik@google.com>
Matthew Whisenhunt <matt.whisenhunt@gmail.com>
Michael McGreevy <mcgreevy@golang.org> Michael McGreevy <mcgreevy@golang.org>
Nick Craig-Wood <nickcw@gmail.com> Nick Craig-Wood <nickcw@gmail.com>
Ross Light <light@google.com> Ross Light <light@google.com>

View File

@ -21,7 +21,7 @@
"basePath": "/cloudmonitoring/v2beta2/projects/", "basePath": "/cloudmonitoring/v2beta2/projects/",
"rootUrl": "https://www.googleapis.com/", "rootUrl": "https://www.googleapis.com/",
"servicePath": "cloudmonitoring/v2beta2/projects/", "servicePath": "cloudmonitoring/v2beta2/projects/",
"batchPath": "batch", "batchPath": "batch/cloudmonitoring/v2beta2",
"parameters": { "parameters": {
"alt": { "alt": {
"type": "string", "type": "string",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,7 @@
package gensupport package gensupport
import ( import (
"encoding/json"
"errors" "errors"
"net/http" "net/http"
@ -59,3 +60,12 @@ func SendRequest(ctx context.Context, client *http.Client, req *http.Request) (*
} }
return resp, err return resp, err
} }
// DecodeResponse decodes the body of res into target. If there is no body,
// target is unchanged.
func DecodeResponse(target interface{}, res *http.Response) error {
if res.StatusCode == http.StatusNoContent {
return nil
}
return json.NewDecoder(res.Body).Decode(target)
}

View File

@ -315,7 +315,9 @@ func (s *DefaultStorageFactory) Backends() []Backend {
for server := range servers { for server := range servers {
backends = append(backends, Backend{ backends = append(backends, Backend{
Server: server, Server: server,
TLSConfig: tlsConfig, // We can't share TLSConfig across different backends to avoid races.
// For more details see: http://pr.k8s.io/59338
TLSConfig: tlsConfig.Clone(),
}) })
} }
return backends return backends

View File

@ -2175,8 +2175,12 @@ func ValidateVolumeMounts(mounts []core.VolumeMount, voldevices map[string]strin
} }
if len(mnt.SubPath) > 0 { if len(mnt.SubPath) > 0 {
if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("subPath"), "subPath is disabled by feature-gate"))
} else {
allErrs = append(allErrs, validateLocalDescendingPath(mnt.SubPath, fldPath.Child("subPath"))...) allErrs = append(allErrs, validateLocalDescendingPath(mnt.SubPath, fldPath.Child("subPath"))...)
} }
}
if mnt.MountPropagation != nil { if mnt.MountPropagation != nil {
allErrs = append(allErrs, validateMountPropagation(mnt.MountPropagation, container, fldPath.Child("mountPropagation"))...) allErrs = append(allErrs, validateMountPropagation(mnt.MountPropagation, container, fldPath.Child("mountPropagation"))...)

View File

@ -52,6 +52,7 @@ go_library(
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library", "//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library", "//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",

View File

@ -91,7 +91,7 @@ func (c *BlobDiskController) CreateVolume(name, storageAccount, storageAccountTy
accounts = append(accounts, accountWithLocation{Name: storageAccount}) accounts = append(accounts, accountWithLocation{Name: storageAccount})
} else { } else {
// find a storage account // find a storage account
accounts, err = c.common.cloud.getStorageAccounts() accounts, err = c.common.cloud.getStorageAccounts(storageAccountType, location)
if err != nil { if err != nil {
// TODO: create a storage account and container // TODO: create a storage account and container
return "", "", 0, err return "", "", 0, err

View File

@ -111,10 +111,8 @@ func (az *Cloud) InstanceID(name types.NodeName) (string, error) {
return "", err return "", err
} }
if isLocalInstance { if isLocalInstance {
externalInstanceID, err := az.metadata.Text("instance/compute/vmId") nodeName := mapNodeNameToVMName(name)
if err == nil { return az.getMachineID(nodeName), nil
return externalInstanceID, nil
}
} }
} }

View File

@ -375,10 +375,12 @@ func (az *Cloud) findServiceIPAddress(clusterName string, service *v1.Service, i
return "", err return "", err
} }
if !existsLb { if !existsLb {
return "", fmt.Errorf("Expected to find an IP address for service %s but did not", service.Name) glog.V(2).Infof("Expected to find an IP address for service %s but did not. Assuming it has been removed", service.Name)
return "", nil
} }
if len(lbStatus.Ingress) < 1 { if len(lbStatus.Ingress) < 1 {
return "", fmt.Errorf("Expected to find an IP address for service %s but it had no ingresses", service.Name) glog.V(2).Infof("Expected to find an IP address for service %s but it had no ingresses. Assuming it has been removed", service.Name)
return "", nil
} }
return lbStatus.Ingress[0].IP, nil return lbStatus.Ingress[0].IP, nil

View File

@ -19,56 +19,75 @@ package azure
import ( import (
"fmt" "fmt"
"github.com/Azure/azure-sdk-for-go/arm/storage"
"github.com/Azure/go-autorest/autorest/to"
"github.com/golang/glog" "github.com/golang/glog"
) )
const (
defaultStorageAccountType = string(storage.StandardLRS)
fileShareAccountNamePrefix = "f"
)
// CreateFileShare creates a file share, using a matching storage account // CreateFileShare creates a file share, using a matching storage account
func (az *Cloud) CreateFileShare(name, storageAccount, storageType, location string, requestGB int) (string, string, error) { func (az *Cloud) CreateFileShare(shareName, accountName, accountType, location string, requestGiB int) (string, string, error) {
var err error if len(accountName) == 0 {
accounts := []accountWithLocation{} // find a storage account that matches accountType
if len(storageAccount) > 0 { accounts, err := az.getStorageAccounts(accountType, location)
accounts = append(accounts, accountWithLocation{Name: storageAccount})
} else {
// find a storage account
accounts, err = az.getStorageAccounts()
if err != nil { if err != nil {
// TODO: create a storage account and container return "", "", fmt.Errorf("could not list storage accounts for account type %s: %v", accountType, err)
return "", "", err }
if len(accounts) > 0 {
accountName = accounts[0].Name
glog.V(4).Infof("found a matching account %s type %s location %s", accounts[0].Name, accounts[0].StorageType, accounts[0].Location)
}
if len(accountName) == 0 {
// not found a matching account, now create a new account in current resource group
accountName = generateStorageAccountName(fileShareAccountNamePrefix)
if location == "" {
location = az.Location
}
if accountType == "" {
accountType = defaultStorageAccountType
}
glog.V(2).Infof("azureFile - no matching account found, begin to create a new account %s in resource group %s, location: %s, accountType: %s",
accountName, az.ResourceGroup, location, accountType)
cp := storage.AccountCreateParameters{
Sku: &storage.Sku{Name: storage.SkuName(accountType)},
Tags: &map[string]*string{"created-by": to.StringPtr("azure-file")},
Location: &location}
cancel := make(chan struct{})
_, errchan := az.StorageAccountClient.Create(az.ResourceGroup, accountName, cp, cancel)
err := <-errchan
if err != nil {
return "", "", fmt.Errorf(fmt.Sprintf("Failed to create storage account %s, error: %s", accountName, err))
} }
} }
for _, account := range accounts { }
glog.V(4).Infof("account %s type %s location %s", account.Name, account.StorageType, account.Location)
if ((storageType == "" || account.StorageType == storageType) && (location == "" || account.Location == location)) || len(storageAccount) > 0 {
// find the access key with this account // find the access key with this account
key, err := az.getStorageAccesskey(account.Name) accountKey, err := az.getStorageAccesskey(accountName)
if err != nil { if err != nil {
err = fmt.Errorf("could not get storage key for storage account %s: %v", account.Name, err) return "", "", fmt.Errorf("could not get storage key for storage account %s: %v", accountName, err)
continue
} }
err = az.createFileShare(account.Name, key, name, requestGB) if err := az.createFileShare(accountName, accountKey, shareName, requestGiB); err != nil {
if err != nil { return "", "", fmt.Errorf("failed to create share %s in account %s: %v", shareName, accountName, err)
err = fmt.Errorf("failed to create share %s in account %s: %v", name, account.Name, err)
continue
} }
glog.V(4).Infof("created share %s in account %s", name, account.Name) glog.V(4).Infof("created share %s in account %s", shareName, accountName)
return account.Name, key, err return accountName, accountKey, nil
}
}
if err == nil {
err = fmt.Errorf("failed to find a matching storage account")
}
return "", "", err
} }
// DeleteFileShare deletes a file share using storage account name and key // DeleteFileShare deletes a file share using storage account name and key
func (az *Cloud) DeleteFileShare(accountName, key, name string) error { func (az *Cloud) DeleteFileShare(accountName, accountKey, shareName string) error {
err := az.deleteFileShare(accountName, key, name) if err := az.deleteFileShare(accountName, accountKey, shareName); err != nil {
if err != nil {
return err return err
} }
glog.V(4).Infof("share %s deleted", name) glog.V(4).Infof("share %s deleted", shareName)
return nil return nil
} }

View File

@ -27,8 +27,8 @@ type accountWithLocation struct {
Name, StorageType, Location string Name, StorageType, Location string
} }
// getStorageAccounts gets the storage accounts' name, type, location in a resource group // getStorageAccounts gets name, type, location of all storage accounts in a resource group which matches matchingAccountType
func (az *Cloud) getStorageAccounts() ([]accountWithLocation, error) { func (az *Cloud) getStorageAccounts(matchingAccountType, matchingLocation string) ([]accountWithLocation, error) {
az.operationPollRateLimiter.Accept() az.operationPollRateLimiter.Accept()
glog.V(10).Infof("StorageAccountClient.ListByResourceGroup(%v): start", az.ResourceGroup) glog.V(10).Infof("StorageAccountClient.ListByResourceGroup(%v): start", az.ResourceGroup)
result, err := az.StorageAccountClient.ListByResourceGroup(az.ResourceGroup) result, err := az.StorageAccountClient.ListByResourceGroup(az.ResourceGroup)
@ -37,22 +37,22 @@ func (az *Cloud) getStorageAccounts() ([]accountWithLocation, error) {
return nil, err return nil, err
} }
if result.Value == nil { if result.Value == nil {
return nil, fmt.Errorf("no storage accounts from resource group %s", az.ResourceGroup) return nil, fmt.Errorf("unexpected error when listing storage accounts from resource group %s", az.ResourceGroup)
} }
accounts := []accountWithLocation{} accounts := []accountWithLocation{}
for _, acct := range *result.Value { for _, acct := range *result.Value {
if acct.Name != nil { if acct.Name != nil && acct.Location != nil && acct.Sku != nil {
name := *acct.Name storageType := string((*acct.Sku).Name)
loc := "" if matchingAccountType != "" && !strings.EqualFold(matchingAccountType, storageType) {
if acct.Location != nil { continue
loc = *acct.Location
} }
storageType := ""
if acct.Sku != nil { location := *acct.Location
storageType = string((*acct.Sku).Name) if matchingLocation != "" && !strings.EqualFold(matchingLocation, location) {
continue
} }
accounts = append(accounts, accountWithLocation{Name: name, StorageType: storageType, Location: loc}) accounts = append(accounts, accountWithLocation{Name: *acct.Name, StorageType: storageType, Location: location})
} }
} }

View File

@ -33,6 +33,7 @@ import (
"github.com/golang/glog" "github.com/golang/glog"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
) )
const ( const (
@ -52,6 +53,8 @@ const (
// nodeLabelRole specifies the role of a node // nodeLabelRole specifies the role of a node
nodeLabelRole = "kubernetes.io/role" nodeLabelRole = "kubernetes.io/role"
storageAccountNameMaxLength = 24
) )
var providerIDRE = regexp.MustCompile(`^` + CloudProviderName + `://(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`) var providerIDRE = regexp.MustCompile(`^` + CloudProviderName + `://(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`)
@ -228,7 +231,7 @@ func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailab
func (az *Cloud) mapLoadBalancerNameToAvailabilitySet(lbName string, clusterName string) (availabilitySetName string) { func (az *Cloud) mapLoadBalancerNameToAvailabilitySet(lbName string, clusterName string) (availabilitySetName string) {
availabilitySetName = strings.TrimSuffix(lbName, InternalLoadBalancerNameSuffix) availabilitySetName = strings.TrimSuffix(lbName, InternalLoadBalancerNameSuffix)
if strings.EqualFold(clusterName, lbName) { if strings.EqualFold(clusterName, availabilitySetName) {
availabilitySetName = az.Config.PrimaryAvailabilitySetName availabilitySetName = az.Config.PrimaryAvailabilitySetName
} }
@ -516,3 +519,13 @@ func ExtractDiskData(diskData interface{}) (provisioningState string, diskState
} }
return provisioningState, diskState, nil return provisioningState, diskState, nil
} }
// get a storage account by UUID
func generateStorageAccountName(accountNamePrefix string) string {
uniqueID := strings.Replace(string(uuid.NewUUID()), "-", "", -1)
accountName := strings.ToLower(accountNamePrefix + uniqueID)
if len(accountName) > storageAccountNameMaxLength {
return accountName[:storageAccountNameMaxLength-1]
}
return accountName
}

View File

@ -26,7 +26,6 @@ import (
"cloud.google.com/go/compute/metadata" "cloud.google.com/go/compute/metadata"
"github.com/golang/glog" "github.com/golang/glog"
computealpha "google.golang.org/api/compute/v0.alpha"
computebeta "google.golang.org/api/compute/v0.beta" computebeta "google.golang.org/api/compute/v0.beta"
compute "google.golang.org/api/compute/v1" compute "google.golang.org/api/compute/v1"
@ -387,7 +386,7 @@ func (gce *GCECloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNe
if err != nil { if err != nil {
return err return err
} }
instance, err := gce.serviceAlpha.Instances.Get(gce.projectID, v1instance.Zone, v1instance.Name).Do() instance, err := gce.serviceBeta.Instances.Get(gce.projectID, v1instance.Zone, v1instance.Name).Do()
if err != nil { if err != nil {
return err return err
} }
@ -401,14 +400,16 @@ func (gce *GCECloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNe
nodeName, instance.NetworkInterfaces) nodeName, instance.NetworkInterfaces)
} }
iface := instance.NetworkInterfaces[0] iface := &computebeta.NetworkInterface{}
iface.AliasIpRanges = append(iface.AliasIpRanges, &computealpha.AliasIpRange{ iface.Name = instance.NetworkInterfaces[0].Name
iface.Fingerprint = instance.NetworkInterfaces[0].Fingerprint
iface.AliasIpRanges = append(iface.AliasIpRanges, &computebeta.AliasIpRange{
IpCidrRange: alias.String(), IpCidrRange: alias.String(),
SubnetworkRangeName: gce.secondaryRangeName, SubnetworkRangeName: gce.secondaryRangeName,
}) })
mc := newInstancesMetricContext("addalias", v1instance.Zone) mc := newInstancesMetricContext("addalias", v1instance.Zone)
op, err := gce.serviceAlpha.Instances.UpdateNetworkInterface( op, err := gce.serviceBeta.Instances.UpdateNetworkInterface(
gce.projectID, lastComponent(instance.Zone), instance.Name, iface.Name, iface).Do() gce.projectID, lastComponent(instance.Zone), instance.Name, iface.Name, iface).Do()
if err != nil { if err != nil {
return mc.Observe(err) return mc.Observe(err)

View File

@ -22,6 +22,7 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"net"
"net/http" "net/http"
"regexp" "regexp"
"strings" "strings"
@ -440,7 +441,7 @@ func getAddressesByName(client *gophercloud.ServiceClient, name types.NodeName)
return nodeAddresses(srv) return nodeAddresses(srv)
} }
func getAddressByName(client *gophercloud.ServiceClient, name types.NodeName) (string, error) { func getAddressByName(client *gophercloud.ServiceClient, name types.NodeName, needIPv6 bool) (string, error) {
addrs, err := getAddressesByName(client, name) addrs, err := getAddressesByName(client, name)
if err != nil { if err != nil {
return "", err return "", err
@ -449,12 +450,20 @@ func getAddressByName(client *gophercloud.ServiceClient, name types.NodeName) (s
} }
for _, addr := range addrs { for _, addr := range addrs {
if addr.Type == v1.NodeInternalIP { isIPv6 := net.ParseIP(addr.Address).To4() == nil
if (addr.Type == v1.NodeInternalIP) && (isIPv6 == needIPv6) {
return addr.Address, nil return addr.Address, nil
} }
} }
return addrs[0].Address, nil for _, addr := range addrs {
isIPv6 := net.ParseIP(addr.Address).To4() == nil
if (addr.Type == v1.NodeExternalIP) && (isIPv6 == needIPv6) {
return addr.Address, nil
}
}
// It should never return an address from a different IP Address family than the one needed
return "", ErrNoAddressFound
} }
// getAttachedInterfacesByID returns the node interfaces of the specified instance. // getAttachedInterfacesByID returns the node interfaces of the specified instance.

View File

@ -18,6 +18,7 @@ package openstack
import ( import (
"errors" "errors"
"net"
"github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
@ -145,7 +146,10 @@ func (r *Routes) CreateRoute(clusterName string, nameHint string, route *cloudpr
onFailure := NewCaller() onFailure := NewCaller()
addr, err := getAddressByName(r.compute, route.TargetNode) ip, _, _ := net.ParseCIDR(route.DestinationCIDR)
isCIDRv6 := ip.To4() == nil
addr, err := getAddressByName(r.compute, route.TargetNode, isCIDRv6)
if err != nil { if err != nil {
return err return err
} }
@ -217,7 +221,10 @@ func (r *Routes) DeleteRoute(clusterName string, route *cloudprovider.Route) err
onFailure := NewCaller() onFailure := NewCaller()
addr, err := getAddressByName(r.compute, route.TargetNode) ip, _, _ := net.ParseCIDR(route.DestinationCIDR)
isCIDRv6 := ip.To4() == nil
addr, err := getAddressByName(r.compute, route.TargetNode, isCIDRv6)
if err != nil { if err != nil {
return err return err
} }

View File

@ -28,7 +28,6 @@ go_library(
"//vendor/gopkg.in/gcfg.v1:go_default_library", "//vendor/gopkg.in/gcfg.v1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library", "//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library", "//vendor/k8s.io/client-go/tools/cache:go_default_library",
], ],

View File

@ -33,6 +33,7 @@ type NodeInfo struct {
dataCenter *vclib.Datacenter dataCenter *vclib.Datacenter
vm *vclib.VirtualMachine vm *vclib.VirtualMachine
vcServer string vcServer string
vmUUID string
} }
type NodeManager struct { type NodeManager struct {
@ -53,6 +54,7 @@ type NodeManager struct {
type NodeDetails struct { type NodeDetails struct {
NodeName string NodeName string
vm *vclib.VirtualMachine vm *vclib.VirtualMachine
VMUUID string
} }
// TODO: Make it configurable in vsphere.conf // TODO: Make it configurable in vsphere.conf
@ -74,7 +76,10 @@ func (nm *NodeManager) DiscoverNode(node *v1.Node) error {
var globalErr *error var globalErr *error
queueChannel = make(chan *VmSearch, QUEUE_SIZE) queueChannel = make(chan *VmSearch, QUEUE_SIZE)
nodeUUID := node.Status.NodeInfo.SystemUUID nodeUUID := GetUUIDFromProviderID(node.Spec.ProviderID)
glog.V(4).Infof("Discovering node %s with uuid %s", node.ObjectMeta.Name, nodeUUID)
vmFound := false vmFound := false
globalErr = nil globalErr = nil
@ -178,7 +183,7 @@ func (nm *NodeManager) DiscoverNode(node *v1.Node) error {
glog.V(4).Infof("Found node %s as vm=%+v in vc=%s and datacenter=%s", glog.V(4).Infof("Found node %s as vm=%+v in vc=%s and datacenter=%s",
node.Name, vm, res.vc, res.datacenter.Name()) node.Name, vm, res.vc, res.datacenter.Name())
nodeInfo := &NodeInfo{dataCenter: res.datacenter, vm: vm, vcServer: res.vc} nodeInfo := &NodeInfo{dataCenter: res.datacenter, vm: vm, vcServer: res.vc, vmUUID: nodeUUID}
nm.addNodeInfo(node.ObjectMeta.Name, nodeInfo) nm.addNodeInfo(node.ObjectMeta.Name, nodeInfo)
for range queueChannel { for range queueChannel {
} }
@ -311,7 +316,7 @@ func (nm *NodeManager) GetNodeDetails() ([]NodeDetails, error) {
} }
nm.nodeInfoMap[nodeName] = n nm.nodeInfoMap[nodeName] = n
glog.V(4).Infof("Updated NodeInfo %q for node %q.", nodeInfo, nodeName) glog.V(4).Infof("Updated NodeInfo %q for node %q.", nodeInfo, nodeName)
nodeDetails = append(nodeDetails, NodeDetails{nodeName, n.vm}) nodeDetails = append(nodeDetails, NodeDetails{nodeName, n.vm, n.vmUUID})
} }
return nodeDetails, nil return nodeDetails, nil
} }

View File

@ -35,7 +35,6 @@ import (
"golang.org/x/net/context" "golang.org/x/net/context"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
k8stypes "k8s.io/apimachinery/pkg/types" k8stypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
@ -54,8 +53,6 @@ const (
MacOuiVC = "00:50:56" MacOuiVC = "00:50:56"
MacOuiEsx = "00:0c:29" MacOuiEsx = "00:0c:29"
CleanUpDummyVMRoutineInterval = 5 CleanUpDummyVMRoutineInterval = 5
UUIDPath = "/sys/class/dmi/id/product_serial"
UUIDPrefix = "VMware-"
) )
var cleanUpRoutineInitialized = false var cleanUpRoutineInitialized = false
@ -72,6 +69,7 @@ type VSphere struct {
vsphereInstanceMap map[string]*VSphereInstance vsphereInstanceMap map[string]*VSphereInstance
// Responsible for managing discovery of k8s node, their location etc. // Responsible for managing discovery of k8s node, their location etc.
nodeManager *NodeManager nodeManager *NodeManager
vmUUID string
} }
// Represents a vSphere instance where one or more kubernetes nodes are running. // Represents a vSphere instance where one or more kubernetes nodes are running.
@ -211,21 +209,23 @@ func init() {
// Initialize passes a Kubernetes clientBuilder interface to the cloud provider // Initialize passes a Kubernetes clientBuilder interface to the cloud provider
func (vs *VSphere) Initialize(clientBuilder controller.ControllerClientBuilder) { func (vs *VSphere) Initialize(clientBuilder controller.ControllerClientBuilder) {
}
// Initialize Node Informers
func (vs *VSphere) SetInformers(informerFactory informers.SharedInformerFactory) {
if vs.cfg == nil { if vs.cfg == nil {
return return
} }
// Only on controller node it is required to register listeners. // Only on controller node it is required to register listeners.
// Register callbacks for node updates // Register callbacks for node updates
client := clientBuilder.ClientOrDie("vsphere-cloud-provider") glog.V(4).Infof("Setting up node informers for vSphere Cloud Provider")
factory := informers.NewSharedInformerFactory(client, 5*time.Minute) nodeInformer := informerFactory.Core().V1().Nodes().Informer()
nodeInformer := factory.Core().V1().Nodes() nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: vs.NodeAdded, AddFunc: vs.NodeAdded,
DeleteFunc: vs.NodeDeleted, DeleteFunc: vs.NodeDeleted,
}) })
go nodeInformer.Informer().Run(wait.NeverStop) glog.V(4).Infof("Node informers in vSphere cloud provider initialized")
glog.V(4).Infof("vSphere cloud provider initialized")
} }
// Creates new worker node interface and returns // Creates new worker node interface and returns
@ -237,7 +237,11 @@ func newWorkerNode() (*VSphere, error) {
glog.Errorf("Failed to get hostname. err: %+v", err) glog.Errorf("Failed to get hostname. err: %+v", err)
return nil, err return nil, err
} }
vs.vmUUID, err = GetVMUUID()
if err != nil {
glog.Errorf("Failed to get uuid. err: %+v", err)
return nil, err
}
return &vs, nil return &vs, nil
} }
@ -403,6 +407,11 @@ func newControllerNode(cfg VSphereConfig) (*VSphere, error) {
glog.Errorf("Failed to get hostname. err: %+v", err) glog.Errorf("Failed to get hostname. err: %+v", err)
return nil, err return nil, err
} }
vs.vmUUID, err = GetVMUUID()
if err != nil {
glog.Errorf("Failed to get uuid. err: %+v", err)
return nil, err
}
runtime.SetFinalizer(&vs, logout) runtime.SetFinalizer(&vs, logout)
return &vs, nil return &vs, nil
} }
@ -584,7 +593,23 @@ func (vs *VSphere) ExternalID(nodeName k8stypes.NodeName) (string, error) {
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running. // InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager. // If false is returned with no error, the instance will be immediately deleted by the cloud controller manager.
func (vs *VSphere) InstanceExistsByProviderID(providerID string) (bool, error) { func (vs *VSphere) InstanceExistsByProviderID(providerID string) (bool, error) {
_, err := vs.InstanceID(convertToK8sType(providerID)) var nodeName string
nodes, err := vs.nodeManager.GetNodeDetails()
if err != nil {
glog.Errorf("Error while obtaining Kubernetes node nodeVmDetail details. error : %+v", err)
return false, err
}
for _, node := range nodes {
if node.VMUUID == GetUUIDFromProviderID(providerID) {
nodeName = node.NodeName
break
}
}
if nodeName == "" {
msg := fmt.Sprintf("Error while obtaining Kubernetes nodename for providerID %s.", providerID)
return false, errors.New(msg)
}
_, err = vs.InstanceID(convertToK8sType(nodeName))
if err == nil { if err == nil {
return true, nil return true, nil
} }
@ -597,7 +622,7 @@ func (vs *VSphere) InstanceID(nodeName k8stypes.NodeName) (string, error) {
instanceIDInternal := func() (string, error) { instanceIDInternal := func() (string, error) {
if vs.hostName == convertToString(nodeName) { if vs.hostName == convertToString(nodeName) {
return vs.hostName, nil return vs.vmUUID, nil
} }
// Below logic can be performed only on master node where VC details are preset. // Below logic can be performed only on master node where VC details are preset.
@ -631,7 +656,7 @@ func (vs *VSphere) InstanceID(nodeName k8stypes.NodeName) (string, error) {
return "", err return "", err
} }
if isActive { if isActive {
return convertToString(nodeName), nil return vs.vmUUID, nil
} }
glog.Warningf("The VM: %s is not in %s state", convertToString(nodeName), vclib.ActivePowerState) glog.Warningf("The VM: %s is not in %s state", convertToString(nodeName), vclib.ActivePowerState)
return "", cloudprovider.InstanceNotFound return "", cloudprovider.InstanceNotFound

View File

@ -35,6 +35,7 @@ import (
"path/filepath" "path/filepath"
"github.com/vmware/govmomi/vim25/mo" "github.com/vmware/govmomi/vim25/mo"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
k8stypes "k8s.io/apimachinery/pkg/types" k8stypes "k8s.io/apimachinery/pkg/types"
"k8s.io/kubernetes/pkg/cloudprovider/providers/vsphere/vclib" "k8s.io/kubernetes/pkg/cloudprovider/providers/vsphere/vclib"
@ -47,6 +48,11 @@ const (
Folder = "Folder" Folder = "Folder"
VirtualMachine = "VirtualMachine" VirtualMachine = "VirtualMachine"
DummyDiskName = "kube-dummyDisk.vmdk" DummyDiskName = "kube-dummyDisk.vmdk"
UUIDPath = "/sys/class/dmi/id/product_serial"
UUIDPrefix = "VMware-"
ProviderPrefix = "vsphere://"
vSphereConfFileEnvVar = "VSPHERE_CONF_FILE"
) )
// GetVSphere reads vSphere configuration from system environment and construct vSphere object // GetVSphere reads vSphere configuration from system environment and construct vSphere object
@ -532,8 +538,74 @@ func (vs *VSphere) checkDiskAttached(ctx context.Context, nodes []k8stypes.NodeN
if err != nil { if err != nil {
return nodesToRetry, err return nodesToRetry, err
} }
glog.V(9).Infof("Verifying volume for nodeName: %q with nodeuuid: %s", nodeName, node.Status.NodeInfo.SystemUUID, vmMoMap)
vclib.VerifyVolumePathsForVM(vmMoMap[strings.ToLower(node.Status.NodeInfo.SystemUUID)], nodeVolumes[nodeName], convertToString(nodeName), attached) nodeUUID := strings.ToLower(GetUUIDFromProviderID(node.Spec.ProviderID))
glog.V(9).Infof("Verifying volume for node %s with nodeuuid %q: %s", nodeName, nodeUUID, vmMoMap)
vclib.VerifyVolumePathsForVM(vmMoMap[nodeUUID], nodeVolumes[nodeName], convertToString(nodeName), attached)
} }
return nodesToRetry, nil return nodesToRetry, nil
} }
func (vs *VSphere) IsDummyVMPresent(vmName string) (bool, error) {
isDummyVMPresent := false
// Create context
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
vsi, err := vs.getVSphereInstanceForServer(vs.cfg.Workspace.VCenterIP, ctx)
if err != nil {
return isDummyVMPresent, err
}
dc, err := vclib.GetDatacenter(ctx, vsi.conn, vs.cfg.Workspace.Datacenter)
if err != nil {
return isDummyVMPresent, err
}
vmFolder, err := dc.GetFolderByPath(ctx, vs.cfg.Workspace.Folder)
if err != nil {
return isDummyVMPresent, err
}
vms, err := vmFolder.GetVirtualMachines(ctx)
if err != nil {
return isDummyVMPresent, err
}
for _, vm := range vms {
if vm.Name() == vmName {
isDummyVMPresent = true
break
}
}
return isDummyVMPresent, nil
}
func GetVMUUID() (string, error) {
id, err := ioutil.ReadFile(UUIDPath)
if err != nil {
return "", fmt.Errorf("error retrieving vm uuid: %s", err)
}
uuidFromFile := string(id[:])
//strip leading and trailing white space and new line char
uuid := strings.TrimSpace(uuidFromFile)
// check the uuid starts with "VMware-"
if !strings.HasPrefix(uuid, UUIDPrefix) {
return "", fmt.Errorf("Failed to match Prefix, UUID read from the file is %v", uuidFromFile)
}
// Strip the prefix and white spaces and -
uuid = strings.Replace(uuid[len(UUIDPrefix):(len(uuid))], " ", "", -1)
uuid = strings.Replace(uuid, "-", "", -1)
if len(uuid) != 32 {
return "", fmt.Errorf("Length check failed, UUID read from the file is %v", uuidFromFile)
}
// need to add dashes, e.g. "564d395e-d807-e18a-cb25-b79f65eb2b9f"
uuid = fmt.Sprintf("%s-%s-%s-%s-%s", uuid[0:8], uuid[8:12], uuid[12:16], uuid[16:20], uuid[20:32])
return uuid, nil
}
func GetUUIDFromProviderID(providerID string) string {
return strings.TrimPrefix(providerID, ProviderPrefix)
}

View File

@ -104,8 +104,19 @@ func oldPodsRunning(newRS *extensions.ReplicaSet, oldRSs []*extensions.ReplicaSe
if newRS != nil && newRS.UID == rsUID { if newRS != nil && newRS.UID == rsUID {
continue continue
} }
if len(podList.Items) > 0 { for _, pod := range podList.Items {
switch pod.Status.Phase {
case v1.PodFailed, v1.PodSucceeded:
// Don't count pods in terminal state.
continue
case v1.PodUnknown:
// This happens in situation like when the node is temporarily disconnected from the cluster.
// If we can't be sure that the pod is not running, we have to count it.
return true return true
default:
// Pod is not in terminal phase.
return true
}
} }
} }
return false return false

View File

@ -244,9 +244,7 @@ func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.
// alias. // alias.
func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error { func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
if sync.mode != SyncFromCloud { if sync.mode != SyncFromCloud {
sync.kubeAPI.EmitNodeWarningEvent(node.Name, InvalidModeEvent, glog.Warningf("Detect mode %q while expect to sync from cloud", sync.mode)
"Cannot sync from cloud in mode %q", sync.mode)
return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode)
} }
glog.V(2).Infof("Updating node spec with alias range, node.PodCIDR = %v", aliasRange) glog.V(2).Infof("Updating node spec with alias range, node.PodCIDR = %v", aliasRange)
@ -276,9 +274,7 @@ func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, nod
// updateAliasFromNode updates the cloud alias given the node allocation. // updateAliasFromNode updates the cloud alias given the node allocation.
func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, node *v1.Node) error { func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, node *v1.Node) error {
if sync.mode != SyncFromCluster { if sync.mode != SyncFromCluster {
sync.kubeAPI.EmitNodeWarningEvent( glog.Warningf("Detect mode %q while expect to sync from cluster", sync.mode)
node.Name, InvalidModeEvent, "Cannot sync to cloud in mode %q", sync.mode)
return fmt.Errorf("cannot sync to cloud in mode %q", sync.mode)
} }
_, aliasRange, err := net.ParseCIDR(node.Spec.PodCIDR) _, aliasRange, err := net.ParseCIDR(node.Spec.PodCIDR)

View File

@ -219,6 +219,20 @@ const (
// //
// Implement IPVS-based in-cluster service load balancing // Implement IPVS-based in-cluster service load balancing
SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode" SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode"
// owner: @joelsmith
// deprecated: v1.10
//
// Mount secret, configMap, downwardAPI and projected volumes ReadOnly. Note: this feature
// gate is present only for backward compatability, it will be removed in the 1.11 release.
ReadOnlyAPIDataVolumes utilfeature.Feature = "ReadOnlyAPIDataVolumes"
// owner: @saad-ali
// ga
//
// Allow mounting a subpath of a volume in a container
// Do not remove this feature gate even though it's GA
VolumeSubpath utilfeature.Feature = "VolumeSubpath"
) )
func init() { func init() {
@ -259,6 +273,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
PVCProtection: {Default: false, PreRelease: utilfeature.Alpha}, PVCProtection: {Default: false, PreRelease: utilfeature.Alpha},
ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha}, ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha},
SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta}, SupportIPVSProxyMode: {Default: false, PreRelease: utilfeature.Beta},
VolumeSubpath: {Default: true, PreRelease: utilfeature.GA},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed // inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side: // unintentionally on either side:
@ -274,4 +289,5 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
// features that enable backwards compatability but are scheduled to be removed // features that enable backwards compatability but are scheduled to be removed
ServiceProxyAllowExternalIPs: {Default: false, PreRelease: utilfeature.Deprecated}, ServiceProxyAllowExternalIPs: {Default: false, PreRelease: utilfeature.Deprecated},
ReadOnlyAPIDataVolumes: {Default: true, PreRelease: utilfeature.Deprecated},
} }

View File

@ -46,7 +46,7 @@ type HandlerRunner interface {
// RuntimeHelper wraps kubelet to make container runtime // RuntimeHelper wraps kubelet to make container runtime
// able to get necessary informations like the RunContainerOptions, DNS settings, Host IP. // able to get necessary informations like the RunContainerOptions, DNS settings, Host IP.
type RuntimeHelper interface { type RuntimeHelper interface {
GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, err error) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, cleanupAction func(), err error)
GetPodDNS(pod *v1.Pod) (dnsConfig *runtimeapi.DNSConfig, err error) GetPodDNS(pod *v1.Pod) (dnsConfig *runtimeapi.DNSConfig, err error)
// GetPodCgroupParent returns the CgroupName identifer, and its literal cgroupfs form on the host // GetPodCgroupParent returns the CgroupName identifer, and its literal cgroupfs form on the host
// of a pod. // of a pod.

View File

@ -300,9 +300,11 @@ func makePodSourceConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, ku
// Restore from the checkpoint path // Restore from the checkpoint path
// NOTE: This MUST happen before creating the apiserver source // NOTE: This MUST happen before creating the apiserver source
// below, or the checkpoint would override the source of truth. // below, or the checkpoint would override the source of truth.
updatechannel := cfg.Channel(kubetypes.ApiserverSource)
var updatechannel chan<- interface{}
if bootstrapCheckpointPath != "" { if bootstrapCheckpointPath != "" {
glog.Infof("Adding checkpoint path: %v", bootstrapCheckpointPath) glog.Infof("Adding checkpoint path: %v", bootstrapCheckpointPath)
updatechannel = cfg.Channel(kubetypes.ApiserverSource)
err := cfg.Restore(bootstrapCheckpointPath, updatechannel) err := cfg.Restore(bootstrapCheckpointPath, updatechannel)
if err != nil { if err != nil {
return nil, err return nil, err
@ -311,6 +313,9 @@ func makePodSourceConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, ku
if kubeDeps.KubeClient != nil { if kubeDeps.KubeClient != nil {
glog.Infof("Watching apiserver") glog.Infof("Watching apiserver")
if updatechannel == nil {
updatechannel = cfg.Channel(kubetypes.ApiserverSource)
}
config.NewSourceApiserver(kubeDeps.KubeClient, nodeName, updatechannel) config.NewSourceApiserver(kubeDeps.KubeClient, nodeName, updatechannel)
} }
return cfg, nil return cfg, nil

View File

@ -63,6 +63,7 @@ import (
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/kubelet/util/format"
utilfile "k8s.io/kubernetes/pkg/util/file" utilfile "k8s.io/kubernetes/pkg/util/file"
mountutil "k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume"
volumeutil "k8s.io/kubernetes/pkg/volume/util" volumeutil "k8s.io/kubernetes/pkg/volume/util"
"k8s.io/kubernetes/pkg/volume/util/volumehelper" "k8s.io/kubernetes/pkg/volume/util/volumehelper"
@ -163,7 +164,7 @@ func (kl *Kubelet) makeBlockVolumes(pod *v1.Pod, container *v1.Container, podVol
} }
// makeMounts determines the mount points for the given container. // makeMounts determines the mount points for the given container.
func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap) ([]kubecontainer.Mount, error) { func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap, mounter mountutil.Interface) ([]kubecontainer.Mount, func(), error) {
// Kubernetes only mounts on /etc/hosts if: // Kubernetes only mounts on /etc/hosts if:
// - container is not an infrastructure (pause) container // - container is not an infrastructure (pause) container
// - container is not already mounting on /etc/hosts // - container is not already mounting on /etc/hosts
@ -173,13 +174,14 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
mountEtcHostsFile := len(podIP) > 0 && runtime.GOOS != "windows" mountEtcHostsFile := len(podIP) > 0 && runtime.GOOS != "windows"
glog.V(3).Infof("container: %v/%v/%v podIP: %q creating hosts mount: %v", pod.Namespace, pod.Name, container.Name, podIP, mountEtcHostsFile) glog.V(3).Infof("container: %v/%v/%v podIP: %q creating hosts mount: %v", pod.Namespace, pod.Name, container.Name, podIP, mountEtcHostsFile)
mounts := []kubecontainer.Mount{} mounts := []kubecontainer.Mount{}
for _, mount := range container.VolumeMounts { var cleanupAction func() = nil
for i, mount := range container.VolumeMounts {
// do not mount /etc/hosts if container is already mounting on the path // do not mount /etc/hosts if container is already mounting on the path
mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath) mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath)
vol, ok := podVolumes[mount.Name] vol, ok := podVolumes[mount.Name]
if !ok || vol.Mounter == nil { if !ok || vol.Mounter == nil {
glog.Errorf("Mount cannot be satisfied for container %q, because the volume is missing or the volume mounter is nil: %+v", container.Name, mount) glog.Errorf("Mount cannot be satisfied for container %q, because the volume is missing or the volume mounter is nil: %+v", container.Name, mount)
return nil, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name) return nil, cleanupAction, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name)
} }
relabelVolume := false relabelVolume := false
@ -192,25 +194,33 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
} }
hostPath, err := volume.GetPath(vol.Mounter) hostPath, err := volume.GetPath(vol.Mounter)
if err != nil { if err != nil {
return nil, err return nil, cleanupAction, err
} }
if mount.SubPath != "" { if mount.SubPath != "" {
if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) {
return nil, cleanupAction, fmt.Errorf("volume subpaths are disabled")
}
if filepath.IsAbs(mount.SubPath) { if filepath.IsAbs(mount.SubPath) {
return nil, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath)
} }
err = volumevalidation.ValidatePathNoBacksteps(mount.SubPath) err = volumevalidation.ValidatePathNoBacksteps(mount.SubPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err) return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err)
} }
fileinfo, err := os.Lstat(hostPath) fileinfo, err := os.Lstat(hostPath)
if err != nil { if err != nil {
return nil, err return nil, cleanupAction, err
} }
perm := fileinfo.Mode() perm := fileinfo.Mode()
hostPath = filepath.Join(hostPath, mount.SubPath) volumePath, err := filepath.EvalSymlinks(hostPath)
if err != nil {
return nil, cleanupAction, err
}
hostPath = filepath.Join(volumePath, mount.SubPath)
if subPathExists, err := utilfile.FileOrSymlinkExists(hostPath); err != nil { if subPathExists, err := utilfile.FileOrSymlinkExists(hostPath); err != nil {
glog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath) glog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath)
@ -219,16 +229,24 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
// incorrect ownership and mode. For example, the sub path directory must have at least g+rwx // incorrect ownership and mode. For example, the sub path directory must have at least g+rwx
// when the pod specifies an fsGroup, and if the directory is not created here, Docker will // when the pod specifies an fsGroup, and if the directory is not created here, Docker will
// later auto-create it with the incorrect mode 0750 // later auto-create it with the incorrect mode 0750
if err := os.MkdirAll(hostPath, perm); err != nil { // Make extra care not to escape the volume!
glog.Errorf("failed to mkdir:%s", hostPath) if err := mounter.SafeMakeDir(hostPath, volumePath, perm); err != nil {
return nil, err glog.Errorf("failed to mkdir %q: %v", hostPath, err)
return nil, cleanupAction, err
} }
// chmod the sub path because umask may have prevented us from making the sub path with the same
// permissions as the mounter path
if err := os.Chmod(hostPath, perm); err != nil {
return nil, err
} }
hostPath, cleanupAction, err = mounter.PrepareSafeSubpath(mountutil.Subpath{
VolumeMountIndex: i,
Path: hostPath,
VolumeName: mount.Name,
VolumePath: volumePath,
PodDir: podDir,
ContainerName: container.Name,
})
if err != nil {
// Don't pass detailed error back to the user because it could give information about host filesystem
glog.Errorf("failed to prepare subPath for volumeMount %q of container %q: %v", mount.Name, container.Name, err)
return nil, cleanupAction, fmt.Errorf("failed to prepare subPath for volumeMount %q of container %q", mount.Name, container.Name)
} }
} }
@ -245,15 +263,17 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
propagation, err := translateMountPropagation(mount.MountPropagation) propagation, err := translateMountPropagation(mount.MountPropagation)
if err != nil { if err != nil {
return nil, err return nil, cleanupAction, err
} }
glog.V(5).Infof("Pod %q container %q mount %q has propagation %q", format.Pod(pod), container.Name, mount.Name, propagation) glog.V(5).Infof("Pod %q container %q mount %q has propagation %q", format.Pod(pod), container.Name, mount.Name, propagation)
mustMountRO := vol.Mounter.GetAttributes().ReadOnly && utilfeature.DefaultFeatureGate.Enabled(features.ReadOnlyAPIDataVolumes)
mounts = append(mounts, kubecontainer.Mount{ mounts = append(mounts, kubecontainer.Mount{
Name: mount.Name, Name: mount.Name,
ContainerPath: containerPath, ContainerPath: containerPath,
HostPath: hostPath, HostPath: hostPath,
ReadOnly: mount.ReadOnly, ReadOnly: mount.ReadOnly || mustMountRO,
SELinuxRelabel: relabelVolume, SELinuxRelabel: relabelVolume,
Propagation: propagation, Propagation: propagation,
}) })
@ -262,11 +282,11 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
hostAliases := pod.Spec.HostAliases hostAliases := pod.Spec.HostAliases
hostsMount, err := makeHostsMount(podDir, podIP, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork) hostsMount, err := makeHostsMount(podDir, podIP, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork)
if err != nil { if err != nil {
return nil, err return nil, cleanupAction, err
} }
mounts = append(mounts, *hostsMount) mounts = append(mounts, *hostsMount)
} }
return mounts, nil return mounts, cleanupAction, nil
} }
// translateMountPropagation transforms v1.MountPropagationMode to // translateMountPropagation transforms v1.MountPropagationMode to
@ -432,17 +452,17 @@ func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string {
// GenerateRunContainerOptions generates the RunContainerOptions, which can be used by // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
// the container runtime to set parameters for launching a container. // the container runtime to set parameters for launching a container.
func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, error) { func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, func(), error) {
opts, err := kl.containerManager.GetResources(pod, container) opts, err := kl.containerManager.GetResources(pod, container)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
cgroupParent := kl.GetPodCgroupParent(pod) cgroupParent := kl.GetPodCgroupParent(pod)
opts.CgroupParent = cgroupParent opts.CgroupParent = cgroupParent
hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod) hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
opts.Hostname = hostname opts.Hostname = hostname
podName := volumehelper.GetUniquePodName(pod) podName := volumehelper.GetUniquePodName(pod)
@ -452,7 +472,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai
// TODO(random-liu): Move following convert functions into pkg/kubelet/container // TODO(random-liu): Move following convert functions into pkg/kubelet/container
devices, err := kl.makeGPUDevices(pod, container) devices, err := kl.makeGPUDevices(pod, container)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
opts.Devices = append(opts.Devices, devices...) opts.Devices = append(opts.Devices, devices...)
@ -461,20 +481,20 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai
blkutil := volumeutil.NewBlockVolumePathHandler() blkutil := volumeutil.NewBlockVolumePathHandler()
blkVolumes, err := kl.makeBlockVolumes(pod, container, volumes, blkutil) blkVolumes, err := kl.makeBlockVolumes(pod, container, volumes, blkutil)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
opts.Devices = append(opts.Devices, blkVolumes...) opts.Devices = append(opts.Devices, blkVolumes...)
} }
mounts, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes) mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes, kl.mounter)
if err != nil { if err != nil {
return nil, err return nil, cleanupAction, err
} }
opts.Mounts = append(opts.Mounts, mounts...) opts.Mounts = append(opts.Mounts, mounts...)
envs, err := kl.makeEnvironmentVariables(pod, container, podIP) envs, err := kl.makeEnvironmentVariables(pod, container, podIP)
if err != nil { if err != nil {
return nil, err return nil, cleanupAction, err
} }
opts.Envs = append(opts.Envs, envs...) opts.Envs = append(opts.Envs, envs...)
@ -494,7 +514,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai
opts.EnableHostUserNamespace = kl.enableHostUserNamespace(pod) opts.EnableHostUserNamespace = kl.enableHostUserNamespace(pod)
} }
return opts, nil return opts, cleanupAction, nil
} }
var masterServices = sets.NewString("kubernetes") var masterServices = sets.NewString("kubernetes")

View File

@ -107,11 +107,15 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
restartCount = containerStatus.RestartCount + 1 restartCount = containerStatus.RestartCount + 1
} }
containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef) containerConfig, cleanupAction, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef)
if cleanupAction != nil {
defer cleanupAction()
}
if err != nil { if err != nil {
m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err)) m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
return grpc.ErrorDesc(err), ErrCreateContainerConfig return grpc.ErrorDesc(err), ErrCreateContainerConfig
} }
containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig) containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
if err != nil { if err != nil {
m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err)) m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
@ -173,20 +177,20 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
} }
// generateContainerConfig generates container config for kubelet runtime v1. // generateContainerConfig generates container config for kubelet runtime v1.
func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string) (*runtimeapi.ContainerConfig, error) { func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string) (*runtimeapi.ContainerConfig, func(), error) {
opts, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) opts, cleanupAction, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
uid, username, err := m.getImageUser(container.Image) uid, username, err := m.getImageUser(container.Image)
if err != nil { if err != nil {
return nil, err return nil, cleanupAction, err
} }
// Verify RunAsNonRoot. Non-root verification only supports numeric user. // Verify RunAsNonRoot. Non-root verification only supports numeric user.
if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil { if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil {
return nil, err return nil, cleanupAction, err
} }
command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs) command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
@ -223,7 +227,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai
} }
config.Envs = envs config.Envs = envs
return config, nil return config, cleanupAction, nil
} }
// generateLinuxContainerConfig generates linux container config for kubelet runtime v1. // generateLinuxContainerConfig generates linux container config for kubelet runtime v1.

View File

@ -842,7 +842,7 @@ func (r *Runtime) newAppcRuntimeApp(pod *v1.Pod, podIP string, c v1.Container, r
} }
// TODO: determine how this should be handled for rkt // TODO: determine how this should be handled for rkt
opts, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP) opts, _, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP)
if err != nil { if err != nil {
return err return err
} }

View File

@ -19,6 +19,8 @@ package util
import ( import (
"fmt" "fmt"
"net/url" "net/url"
"path/filepath"
"strings"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
@ -45,3 +47,14 @@ func parseEndpoint(endpoint string) (string, string, error) {
return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme) return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme)
} }
} }
func pathWithinBase(fullPath, basePath string) bool {
rel, err := filepath.Rel(basePath, fullPath)
if err != nil {
return false
}
if strings.HasPrefix(rel, "..") {
return false
}
return true
}

View File

@ -31,3 +31,12 @@ func CreateListener(endpoint string) (net.Listener, error) {
func GetAddressAndDialer(endpoint string) (string, func(addr string, timeout time.Duration) (net.Conn, error), error) { func GetAddressAndDialer(endpoint string) (string, func(addr string, timeout time.Duration) (net.Conn, error), error) {
return "", nil, fmt.Errorf("GetAddressAndDialer is unsupported in this build") return "", nil, fmt.Errorf("GetAddressAndDialer is unsupported in this build")
} }
// LockAndCheckSubPath empty implementation
func LockAndCheckSubPath(volumePath, subPath string) ([]uintptr, error) {
return []uintptr{}, nil
}
// UnlockPath empty implementation
func UnlockPath(fileHandles []uintptr) {
}

View File

@ -179,7 +179,7 @@ func (rc *reconciler) reconcile() {
glog.Errorf(mountedVolume.GenerateErrorDetailed(fmt.Sprintf("operationExecutor.NewVolumeHandler for UnmountVolume failed"), err).Error()) glog.Errorf(mountedVolume.GenerateErrorDetailed(fmt.Sprintf("operationExecutor.NewVolumeHandler for UnmountVolume failed"), err).Error())
continue continue
} }
err = volumeHandler.UnmountVolumeHandler(mountedVolume.MountedVolume, rc.actualStateOfWorld) err = volumeHandler.UnmountVolumeHandler(mountedVolume.MountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir)
if err != nil && if err != nil &&
!nestedpendingoperations.IsAlreadyExists(err) && !nestedpendingoperations.IsAlreadyExists(err) &&
!exponentialbackoff.IsExponentialBackoff(err) { !exponentialbackoff.IsExponentialBackoff(err) {

View File

@ -23,18 +23,32 @@ import (
"os" "os"
"runtime" "runtime"
"sync" "sync"
"syscall"
"time" "time"
"unsafe" "unsafe"
"github.com/golang/glog" "github.com/golang/glog"
cadvisorapi "github.com/google/cadvisor/info/v1" cadvisorapi "github.com/google/cadvisor/info/v1"
"golang.org/x/sys/windows"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
) )
// MemoryStatusEx is the same as Windows structure MEMORYSTATUSEX
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa366770(v=vs.85).aspx
type MemoryStatusEx struct {
Length uint32
MemoryLoad uint32
TotalPhys uint64
AvailPhys uint64
TotalPageFile uint64
AvailPageFile uint64
TotalVirtual uint64
AvailVirtual uint64
AvailExtendedVirtual uint64
}
var ( var (
modkernel32 = syscall.NewLazyDLL("kernel32.dll") modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
procGetPhysicallyInstalledSystemMemory = modkernel32.NewProc("GetPhysicallyInstalledSystemMemory") procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx")
) )
// NewPerfCounterClient creates a client using perf counters // NewPerfCounterClient creates a client using perf counters
@ -163,20 +177,24 @@ func (p *perfCounterNodeStatsClient) convertCPUValue(cpuValue uint64) uint64 {
} }
func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) { func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) {
var physicalMemoryKiloBytes uint64 // We use GlobalMemoryStatusEx instead of GetPhysicallyInstalledSystemMemory
// on Windows node for the following reasons:
// 1. GetPhysicallyInstalledSystemMemory retrieves the amount of physically
// installed RAM from the computer's SMBIOS firmware tables.
// https://msdn.microsoft.com/en-us/library/windows/desktop/cc300158(v=vs.85).aspx
// On some VM, it is unable to read data from SMBIOS and fails with ERROR_INVALID_DATA.
// 2. On Linux node, total physical memory is read from MemTotal in /proc/meminfo.
// GlobalMemoryStatusEx returns the amount of physical memory that is available
// for the operating system to use. The amount returned by GlobalMemoryStatusEx
// is closer in parity with Linux
// https://www.kernel.org/doc/Documentation/filesystems/proc.txt
var statex MemoryStatusEx
statex.Length = uint32(unsafe.Sizeof(statex))
ret, _, _ := procGlobalMemoryStatusEx.Call(uintptr(unsafe.Pointer(&statex)))
if ok := getPhysicallyInstalledSystemMemory(&physicalMemoryKiloBytes); !ok { if ret == 0 {
return 0, errors.New("unable to read physical memory") return 0, errors.New("unable to read physical memory")
} }
return physicalMemoryKiloBytes * 1024, nil // convert kilobytes to bytes return statex.TotalPhys, nil
}
func getPhysicallyInstalledSystemMemory(totalMemoryInKilobytes *uint64) bool {
ret, _, _ := syscall.Syscall(procGetPhysicallyInstalledSystemMemory.Addr(), 1,
uintptr(unsafe.Pointer(totalMemoryInKilobytes)),
0,
0)
return ret != 0
} }

View File

@ -59,7 +59,12 @@ func TunnelSyncHealthChecker(tunneler Tunneler) func(req *http.Request) error {
return fmt.Errorf("Tunnel sync is taking too long: %d", lag) return fmt.Errorf("Tunnel sync is taking too long: %d", lag)
} }
sshKeyLag := tunneler.SecondsSinceSSHKeySync() sshKeyLag := tunneler.SecondsSinceSSHKeySync()
if sshKeyLag > 600 { // Since we are syncing ssh-keys every 5 minutes, the allowed
// lag since last sync should be more than 2x higher than that
// to allow for single failure, which can always happen.
// For now set it to 3x, which is 15 minutes.
// For more details see: http://pr.k8s.io/59347
if sshKeyLag > 900 {
return fmt.Errorf("SSHKey sync is taking too long: %d", sshKeyLag) return fmt.Errorf("SSHKey sync is taking too long: %d", sshKeyLag)
} }
return nil return nil

View File

@ -59,7 +59,17 @@ go_test(
}), }),
importpath = "k8s.io/kubernetes/pkg/util/mount", importpath = "k8s.io/kubernetes/pkg/util/mount",
library = ":go_default_library", library = ":go_default_library",
deps = ["//vendor/k8s.io/utils/exec/testing:go_default_library"], deps = [
"//vendor/k8s.io/utils/exec/testing:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/github.com/golang/glog:go_default_library",
],
"@io_bazel_rules_go//go/platform:windows_amd64": [
"//vendor/github.com/stretchr/testify/assert:go_default_library",
],
"//conditions:default": [],
}),
) )
filegroup( filegroup(

View File

@ -20,6 +20,7 @@ package mount
import ( import (
"fmt" "fmt"
"os"
"github.com/golang/glog" "github.com/golang/glog"
) )
@ -138,3 +139,15 @@ func (m *execMounter) MakeDir(pathname string) error {
func (m *execMounter) ExistsPath(pathname string) bool { func (m *execMounter) ExistsPath(pathname string) bool {
return m.wrappedMounter.ExistsPath(pathname) return m.wrappedMounter.ExistsPath(pathname)
} }
func (m *execMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
return m.wrappedMounter.PrepareSafeSubpath(subPath)
}
func (m *execMounter) CleanSubPaths(podDir string, volumeName string) error {
return m.wrappedMounter.CleanSubPaths(podDir, volumeName)
}
func (m *execMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return m.wrappedMounter.SafeMakeDir(pathname, base, perm)
}

View File

@ -20,6 +20,7 @@ package mount
import ( import (
"errors" "errors"
"os"
) )
type execMounter struct{} type execMounter struct{}
@ -85,3 +86,15 @@ func (mounter *execMounter) MakeFile(pathname string) error {
func (mounter *execMounter) ExistsPath(pathname string) bool { func (mounter *execMounter) ExistsPath(pathname string) bool {
return true return true
} }
func (mounter *execMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
return subPath.Path, nil, nil
}
func (mounter *execMounter) CleanSubPaths(podDir string, volumeName string) error {
return nil
}
func (mounter *execMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return nil
}

View File

@ -197,3 +197,14 @@ func (f *FakeMounter) MakeFile(pathname string) error {
func (f *FakeMounter) ExistsPath(pathname string) bool { func (f *FakeMounter) ExistsPath(pathname string) bool {
return false return false
} }
func (f *FakeMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
return subPath.Path, nil, nil
}
func (f *FakeMounter) CleanSubPaths(podDir string, volumeName string) error {
return nil
}
func (mounter *FakeMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return nil
}

View File

@ -19,7 +19,10 @@ limitations under the License.
package mount package mount
import ( import (
"os"
"path/filepath" "path/filepath"
"strings"
"syscall"
) )
type FileType string type FileType string
@ -81,9 +84,45 @@ type Interface interface {
// MakeDir creates a new directory. // MakeDir creates a new directory.
// Will operate in the host mount namespace if kubelet is running in a container // Will operate in the host mount namespace if kubelet is running in a container
MakeDir(pathname string) error MakeDir(pathname string) error
// SafeMakeDir makes sure that the created directory does not escape given
// base directory mis-using symlinks. The directory is created in the same
// mount namespace as where kubelet is running. Note that the function makes
// sure that it creates the directory somewhere under the base, nothing
// else. E.g. if the directory already exists, it may exists outside of the
// base due to symlinks.
SafeMakeDir(pathname string, base string, perm os.FileMode) error
// ExistsPath checks whether the path exists. // ExistsPath checks whether the path exists.
// Will operate in the host mount namespace if kubelet is running in a container // Will operate in the host mount namespace if kubelet is running in a container
ExistsPath(pathname string) bool ExistsPath(pathname string) bool
// CleanSubPaths removes any bind-mounts created by PrepareSafeSubpath in given
// pod volume directory.
CleanSubPaths(podDir string, volumeName string) error
// PrepareSafeSubpath does everything that's necessary to prepare a subPath
// that's 1) inside given volumePath and 2) immutable after this call.
//
// newHostPath - location of prepared subPath. It should be used instead of
// hostName when running the container.
// cleanupAction - action to run when the container is running or it failed to start.
//
// CleanupAction must be called immediately after the container with given
// subpath starts. On the other hand, Interface.CleanSubPaths must be called
// when the pod finishes.
PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error)
}
type Subpath struct {
// index of the VolumeMount for this container
VolumeMountIndex int
// Full path to the subpath directory on the host
Path string
// name of the volume that is a valid directory name.
VolumeName string
// Full path to the volume path
VolumePath string
// Path to the pod's directory, including pod UID
PodDir string
// Name of the container
ContainerName string
} }
// Exec executes command where mount utilities are. This can be either the host, // Exec executes command where mount utilities are. This can be either the host,
@ -199,6 +238,13 @@ func GetDeviceNameFromMount(mounter Interface, mountPath string) (string, int, e
return device, refCount, nil return device, refCount, nil
} }
func isNotDirErr(err error) bool {
if e, ok := err.(*os.PathError); ok && e.Err == syscall.ENOTDIR {
return true
}
return false
}
// IsNotMountPoint determines if a directory is a mountpoint. // IsNotMountPoint determines if a directory is a mountpoint.
// It should return ErrNotExist when the directory does not exist. // It should return ErrNotExist when the directory does not exist.
// This method uses the List() of all mountpoints // This method uses the List() of all mountpoints
@ -208,7 +254,7 @@ func IsNotMountPoint(mounter Interface, file string) (bool, error) {
// IsLikelyNotMountPoint provides a quick check // IsLikelyNotMountPoint provides a quick check
// to determine whether file IS A mountpoint // to determine whether file IS A mountpoint
notMnt, notMntErr := mounter.IsLikelyNotMountPoint(file) notMnt, notMntErr := mounter.IsLikelyNotMountPoint(file)
if notMntErr != nil { if notMntErr != nil && isNotDirErr(notMntErr) {
return notMnt, notMntErr return notMnt, notMntErr
} }
// identified as mountpoint, so return this fact // identified as mountpoint, so return this fact
@ -254,3 +300,16 @@ func isBind(options []string) (bool, []string) {
return bind, bindRemountOpts return bind, bindRemountOpts
} }
// pathWithinBase checks if give path is withing given base directory.
func pathWithinBase(fullPath, basePath string) bool {
rel, err := filepath.Rel(basePath, fullPath)
if err != nil {
return false
}
if strings.HasPrefix(rel, "..") {
// Needed to escape the base path
return false
}
return true
}

View File

@ -20,6 +20,7 @@ package mount
import ( import (
"fmt" "fmt"
"io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path" "path"
@ -48,6 +49,11 @@ const (
fsckErrorsCorrected = 1 fsckErrorsCorrected = 1
// 'fsck' found errors but exited without correcting them // 'fsck' found errors but exited without correcting them
fsckErrorsUncorrected = 4 fsckErrorsUncorrected = 4
// place for subpath mounts
containerSubPathDirectoryName = "volume-subpaths"
// syscall.Openat flags used to traverse directories not following symlinks
nofollowFlags = syscall.O_RDONLY | syscall.O_NOFOLLOW
) )
// Mounter provides the default implementation of mount.Interface // Mounter provides the default implementation of mount.Interface
@ -599,6 +605,7 @@ func isShared(path string, filename string) (bool, error) {
} }
type mountInfo struct { type mountInfo struct {
// Path of the mount point
mountPoint string mountPoint string
// list of "optional parameters", mount propagation is one of them // list of "optional parameters", mount propagation is one of them
optional []string optional []string
@ -618,6 +625,7 @@ func parseMountInfo(filename string) ([]mountInfo, error) {
// the last split() item is empty string following the last \n // the last split() item is empty string following the last \n
continue continue
} }
// See `man proc` for authoritative description of format of the file.
fields := strings.Fields(line) fields := strings.Fields(line)
if len(fields) < 7 { if len(fields) < 7 {
return nil, fmt.Errorf("wrong number of fields in (expected %d, got %d): %s", 8, len(fields), line) return nil, fmt.Errorf("wrong number of fields in (expected %d, got %d): %s", 8, len(fields), line)
@ -626,6 +634,7 @@ func parseMountInfo(filename string) ([]mountInfo, error) {
mountPoint: fields[4], mountPoint: fields[4],
optional: []string{}, optional: []string{},
} }
// All fields until "-" are "optional fields".
for i := 6; i < len(fields) && fields[i] != "-"; i++ { for i := 6; i < len(fields) && fields[i] != "-"; i++ {
info.optional = append(info.optional, fields[i]) info.optional = append(info.optional, fields[i])
} }
@ -661,3 +670,439 @@ func doMakeRShared(path string, mountInfoFilename string) error {
return nil return nil
} }
func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
newHostPath, err = doBindSubPath(mounter, subPath, os.Getpid())
// There is no action when the container starts. Bind-mount will be cleaned
// when container stops by CleanSubPaths.
cleanupAction = nil
return newHostPath, cleanupAction, err
}
// This implementation is shared between Linux and NsEnterMounter
// kubeletPid is PID of kubelet in the PID namespace where bind-mount is done,
// i.e. pid on the *host* if kubelet runs in a container.
func doBindSubPath(mounter Interface, subpath Subpath, kubeletPid int) (hostPath string, err error) {
// Check early for symlink. This is just a pre-check to avoid bind-mount
// before the final check.
evalSubPath, err := filepath.EvalSymlinks(subpath.Path)
if err != nil {
return "", fmt.Errorf("evalSymlinks %q failed: %v", subpath.Path, err)
}
glog.V(5).Infof("doBindSubPath %q, full subpath %q for volumepath %q", subpath.Path, evalSubPath, subpath.VolumePath)
evalSubPath = filepath.Clean(evalSubPath)
if !pathWithinBase(evalSubPath, subpath.VolumePath) {
return "", fmt.Errorf("subpath %q not within volume path %q", evalSubPath, subpath.VolumePath)
}
// Prepare directory for bind mounts
// containerName is DNS label, i.e. safe as a directory name.
bindDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName)
err = os.MkdirAll(bindDir, 0750)
if err != nil && !os.IsExist(err) {
return "", fmt.Errorf("error creating directory %s: %s", bindDir, err)
}
bindPathTarget := filepath.Join(bindDir, strconv.Itoa(subpath.VolumeMountIndex))
success := false
defer func() {
// Cleanup subpath on error
if !success {
glog.V(4).Infof("doBindSubPath() failed for %q, cleaning up subpath", bindPathTarget)
if cleanErr := cleanSubPath(mounter, subpath); cleanErr != nil {
glog.Errorf("Failed to clean subpath %q: %v", bindPathTarget, cleanErr)
}
}
}()
// Check it's not already bind-mounted
notMount, err := IsNotMountPoint(mounter, bindPathTarget)
if err != nil {
if !os.IsNotExist(err) {
return "", fmt.Errorf("error checking path %s for mount: %s", bindPathTarget, err)
}
// Ignore ErrorNotExist: the file/directory will be created below if it does not exist yet.
notMount = true
}
if !notMount {
// It's already mounted
glog.V(5).Infof("Skipping bind-mounting subpath %s: already mounted", bindPathTarget)
success = true
return bindPathTarget, nil
}
// Create target of the bind mount. A directory for directories, empty file
// for everything else.
t, err := os.Lstat(subpath.Path)
if err != nil {
return "", fmt.Errorf("lstat %s failed: %s", subpath.Path, err)
}
if t.Mode()&os.ModeDir > 0 {
if err = os.Mkdir(bindPathTarget, 0750); err != nil && !os.IsExist(err) {
return "", fmt.Errorf("error creating directory %s: %s", bindPathTarget, err)
}
} else {
// "/bin/touch <bindDir>".
// A file is enough for all possible targets (symlink, device, pipe,
// socket, ...), bind-mounting them into a file correctly changes type
// of the target file.
if err = ioutil.WriteFile(bindPathTarget, []byte{}, 0640); err != nil {
return "", fmt.Errorf("error creating file %s: %s", bindPathTarget, err)
}
}
// Safe open subpath and get the fd
fd, err := doSafeOpen(evalSubPath, subpath.VolumePath)
if err != nil {
return "", fmt.Errorf("error opening subpath %v: %v", evalSubPath, err)
}
defer syscall.Close(fd)
mountSource := fmt.Sprintf("/proc/%d/fd/%v", kubeletPid, fd)
// Do the bind mount
glog.V(5).Infof("bind mounting %q at %q", mountSource, bindPathTarget)
if err = mounter.Mount(mountSource, bindPathTarget, "" /*fstype*/, []string{"bind"}); err != nil {
return "", fmt.Errorf("error mounting %s: %s", subpath.Path, err)
}
success = true
glog.V(3).Infof("Bound SubPath %s into %s", subpath.Path, bindPathTarget)
return bindPathTarget, nil
}
func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error {
return doCleanSubPaths(mounter, podDir, volumeName)
}
// This implementation is shared between Linux and NsEnterMounter
func doCleanSubPaths(mounter Interface, podDir string, volumeName string) error {
glog.V(4).Infof("Cleaning up subpath mounts for %s", podDir)
// scan /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/*
subPathDir := filepath.Join(podDir, containerSubPathDirectoryName, volumeName)
containerDirs, err := ioutil.ReadDir(subPathDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("error reading %s: %s", subPathDir, err)
}
for _, containerDir := range containerDirs {
if !containerDir.IsDir() {
glog.V(4).Infof("Container file is not a directory: %s", containerDir.Name())
continue
}
glog.V(4).Infof("Cleaning up subpath mounts for container %s", containerDir.Name())
// scan /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/<container name>/*
fullContainerDirPath := filepath.Join(subPathDir, containerDir.Name())
subPaths, err := ioutil.ReadDir(fullContainerDirPath)
if err != nil {
return fmt.Errorf("error reading %s: %s", fullContainerDirPath, err)
}
for _, subPath := range subPaths {
if err = doCleanSubPath(mounter, fullContainerDirPath, subPath.Name()); err != nil {
return err
}
}
// Whole container has been processed, remove its directory.
if err := os.Remove(fullContainerDirPath); err != nil {
return fmt.Errorf("error deleting %s: %s", fullContainerDirPath, err)
}
glog.V(5).Infof("Removed %s", fullContainerDirPath)
}
// Whole pod volume subpaths have been cleaned up, remove its subpath directory.
if err := os.Remove(subPathDir); err != nil {
return fmt.Errorf("error deleting %s: %s", subPathDir, err)
}
glog.V(5).Infof("Removed %s", subPathDir)
// Remove entire subpath directory if it's the last one
podSubPathDir := filepath.Join(podDir, containerSubPathDirectoryName)
if err := os.Remove(podSubPathDir); err != nil && !os.IsExist(err) {
return fmt.Errorf("error deleting %s: %s", podSubPathDir, err)
}
glog.V(5).Infof("Removed %s", podSubPathDir)
return nil
}
// doCleanSubPath tears down the single subpath bind mount
func doCleanSubPath(mounter Interface, fullContainerDirPath, subPathIndex string) error {
// process /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/<container name>/<subPathName>
glog.V(4).Infof("Cleaning up subpath mounts for subpath %v", subPathIndex)
fullSubPath := filepath.Join(fullContainerDirPath, subPathIndex)
notMnt, err := IsNotMountPoint(mounter, fullSubPath)
if err != nil {
return fmt.Errorf("error checking %s for mount: %s", fullSubPath, err)
}
// Unmount it
if !notMnt {
if err = mounter.Unmount(fullSubPath); err != nil {
return fmt.Errorf("error unmounting %s: %s", fullSubPath, err)
}
glog.V(5).Infof("Unmounted %s", fullSubPath)
}
// Remove it *non*-recursively, just in case there were some hiccups.
if err = os.Remove(fullSubPath); err != nil {
return fmt.Errorf("error deleting %s: %s", fullSubPath, err)
}
glog.V(5).Infof("Removed %s", fullSubPath)
return nil
}
// cleanSubPath will teardown the subpath bind mount and any remove any directories if empty
func cleanSubPath(mounter Interface, subpath Subpath) error {
containerDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName)
// Clean subdir bindmount
if err := doCleanSubPath(mounter, containerDir, strconv.Itoa(subpath.VolumeMountIndex)); err != nil && !os.IsNotExist(err) {
return err
}
// Recusively remove directories if empty
if err := removeEmptyDirs(subpath.PodDir, containerDir); err != nil {
return err
}
return nil
}
// removeEmptyDirs works backwards from endDir to baseDir and removes each directory
// if it is empty. It stops once it encounters a directory that has content
func removeEmptyDirs(baseDir, endDir string) error {
if !pathWithinBase(endDir, baseDir) {
return fmt.Errorf("endDir %q is not within baseDir %q", endDir, baseDir)
}
for curDir := endDir; curDir != baseDir; curDir = filepath.Dir(curDir) {
s, err := os.Stat(curDir)
if err != nil {
if os.IsNotExist(err) {
glog.V(5).Infof("curDir %q doesn't exist, skipping", curDir)
continue
}
return fmt.Errorf("error stat %q: %v", curDir, err)
}
if !s.IsDir() {
return fmt.Errorf("path %q not a directory", curDir)
}
err = os.Remove(curDir)
if os.IsExist(err) {
glog.V(5).Infof("Directory %q not empty, not removing", curDir)
break
} else if err != nil {
return fmt.Errorf("error removing directory %q: %v", curDir, err)
}
glog.V(5).Infof("Removed directory %q", curDir)
}
return nil
}
func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return doSafeMakeDir(pathname, base, perm)
}
// This implementation is shared between Linux and NsEnterMounter
func doSafeMakeDir(pathname string, base string, perm os.FileMode) error {
glog.V(4).Infof("Creating directory %q within base %q", pathname, base)
if !pathWithinBase(pathname, base) {
return fmt.Errorf("path %s is outside of allowed base %s", pathname, base)
}
// Quick check if the directory already exists
s, err := os.Stat(pathname)
if err == nil {
// Path exists
if s.IsDir() {
// The directory already exists. It can be outside of the parent,
// but there is no race-proof check.
glog.V(4).Infof("Directory %s already exists", pathname)
return nil
}
return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR}
}
// Find all existing directories
existingPath, toCreate, err := findExistingPrefix(base, pathname)
if err != nil {
return fmt.Errorf("error opening directory %s: %s", pathname, err)
}
// Ensure the existing directory is inside allowed base
fullExistingPath, err := filepath.EvalSymlinks(existingPath)
if err != nil {
return fmt.Errorf("error opening directory %s: %s", existingPath, err)
}
if !pathWithinBase(fullExistingPath, base) {
return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err)
}
glog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...))
parentFD, err := doSafeOpen(fullExistingPath, base)
if err != nil {
return fmt.Errorf("cannot open directory %s: %s", existingPath, err)
}
childFD := -1
defer func() {
if parentFD != -1 {
if err = syscall.Close(parentFD); err != nil {
glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err)
}
}
if childFD != -1 {
if err = syscall.Close(childFD); err != nil {
glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", childFD, pathname, err)
}
}
}()
currentPath := fullExistingPath
// create the directories one by one, making sure nobody can change
// created directory into symlink.
for _, dir := range toCreate {
currentPath = filepath.Join(currentPath, dir)
glog.V(4).Infof("Creating %s", dir)
err = syscall.Mkdirat(parentFD, currentPath, uint32(perm))
if err != nil {
return fmt.Errorf("cannot create directory %s: %s", currentPath, err)
}
// Dive into the created directory
childFD, err := syscall.Openat(parentFD, dir, nofollowFlags, 0)
if err != nil {
return fmt.Errorf("cannot open %s: %s", currentPath, err)
}
// We can be sure that childFD is safe to use. It could be changed
// by user after Mkdirat() and before Openat(), however:
// - it could not be changed to symlink - we use nofollowFlags
// - it could be changed to a file (or device, pipe, socket, ...)
// but either subsequent Mkdirat() fails or we mount this file
// to user's container. Security is no violated in both cases
// and user either gets error or the file that it can already access.
if err = syscall.Close(parentFD); err != nil {
glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err)
}
parentFD = childFD
childFD = -1
}
// Everything was created. mkdirat(..., perm) above was affected by current
// umask and we must apply the right permissions to the last directory
// (that's the one that will be available to the container as subpath)
// so user can read/write it. This is the behavior of previous code.
// TODO: chmod all created directories, not just the last one.
// parentFD is the last created directory.
if err = syscall.Fchmod(parentFD, uint32(perm)&uint32(os.ModePerm)); err != nil {
return fmt.Errorf("chmod %q failed: %s", currentPath, err)
}
return nil
}
// findExistingPrefix finds prefix of pathname that exists. In addition, it
// returns list of remaining directories that don't exist yet.
func findExistingPrefix(base, pathname string) (string, []string, error) {
rel, err := filepath.Rel(base, pathname)
if err != nil {
return base, nil, err
}
dirs := strings.Split(rel, string(filepath.Separator))
// Do OpenAt in a loop to find the first non-existing dir. Resolve symlinks.
// This should be faster than looping through all dirs and calling os.Stat()
// on each of them, as the symlinks are resolved only once with OpenAt().
currentPath := base
fd, err := syscall.Open(currentPath, syscall.O_RDONLY, 0)
if err != nil {
return pathname, nil, fmt.Errorf("error opening %s: %s", currentPath, err)
}
defer func() {
if err = syscall.Close(fd); err != nil {
glog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err)
}
}()
for i, dir := range dirs {
childFD, err := syscall.Openat(fd, dir, syscall.O_RDONLY, 0)
if err != nil {
if os.IsNotExist(err) {
return currentPath, dirs[i:], nil
}
return base, nil, err
}
if err = syscall.Close(fd); err != nil {
glog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err)
}
fd = childFD
currentPath = filepath.Join(currentPath, dir)
}
return pathname, []string{}, nil
}
// This implementation is shared between Linux and NsEnterMounter
// Open path and return its fd.
// Symlinks are disallowed (pathname must already resolve symlinks),
// and the path must be within the base directory.
func doSafeOpen(pathname string, base string) (int, error) {
// Calculate segments to follow
subpath, err := filepath.Rel(base, pathname)
if err != nil {
return -1, err
}
segments := strings.Split(subpath, string(filepath.Separator))
// Assumption: base is the only directory that we have under control.
// Base dir is not allowed to be a symlink.
parentFD, err := syscall.Open(base, nofollowFlags, 0)
if err != nil {
return -1, fmt.Errorf("cannot open directory %s: %s", base, err)
}
defer func() {
if parentFD != -1 {
if err = syscall.Close(parentFD); err != nil {
glog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err)
}
}
}()
childFD := -1
defer func() {
if childFD != -1 {
if err = syscall.Close(childFD); err != nil {
glog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err)
}
}
}()
currentPath := base
// Follow the segments one by one using openat() to make
// sure the user cannot change already existing directories into symlinks.
for _, seg := range segments {
currentPath = filepath.Join(currentPath, seg)
if !pathWithinBase(currentPath, base) {
return -1, fmt.Errorf("path %s is outside of allowed base %s", currentPath, base)
}
glog.V(5).Infof("Opening path %s", currentPath)
childFD, err = syscall.Openat(parentFD, seg, nofollowFlags, 0)
if err != nil {
return -1, fmt.Errorf("cannot open %s: %s", currentPath, err)
}
// Close parentFD
if err = syscall.Close(parentFD); err != nil {
return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err)
}
// Set child to new parent
parentFD = childFD
childFD = -1
}
// We made it to the end, return this fd, don't close it
finalFD := parentFD
parentFD = -1
return finalFD, nil
}

View File

@ -20,6 +20,7 @@ package mount
import ( import (
"errors" "errors"
"os"
) )
type Mounter struct { type Mounter struct {
@ -108,3 +109,15 @@ func (mounter *Mounter) MakeFile(pathname string) error {
func (mounter *Mounter) ExistsPath(pathname string) bool { func (mounter *Mounter) ExistsPath(pathname string) bool {
return true return true
} }
func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
return subPath.Path, nil, nil
}
func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error {
return nil
}
func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return nil
}

View File

@ -89,10 +89,7 @@ func (mounter *Mounter) Mount(source string, target string, fstype string, optio
cmdLine += fmt.Sprintf(";New-SmbGlobalMapping -RemotePath %s -Credential $Credential", source) cmdLine += fmt.Sprintf(";New-SmbGlobalMapping -RemotePath %s -Credential $Credential", source)
if output, err := exec.Command("powershell", "/c", cmdLine).CombinedOutput(); err != nil { if output, err := exec.Command("powershell", "/c", cmdLine).CombinedOutput(); err != nil {
// we don't return error here, even though New-SmbGlobalMapping failed, we still make it successful, return fmt.Errorf("azureMount: SmbGlobalMapping failed: %v, only SMB mount is supported now, output: %q", err, string(output))
// will return error when Windows 2016 RS3 is ready on azure
glog.Errorf("azureMount: SmbGlobalMapping failed: %v, only SMB mount is supported now, output: %q", err, string(output))
return os.MkdirAll(target, 0755)
} }
} }
@ -263,6 +260,123 @@ func (mounter *Mounter) ExistsPath(pathname string) bool {
return true return true
} }
// check whether hostPath is within volume path
// this func will lock all intermediate subpath directories, need to close handle outside of this func after container started
func lockAndCheckSubPath(volumePath, hostPath string) ([]uintptr, error) {
if len(volumePath) == 0 || len(hostPath) == 0 {
return []uintptr{}, nil
}
finalSubPath, err := filepath.EvalSymlinks(hostPath)
if err != nil {
return []uintptr{}, fmt.Errorf("cannot read link %s: %s", hostPath, err)
}
finalVolumePath, err := filepath.EvalSymlinks(volumePath)
if err != nil {
return []uintptr{}, fmt.Errorf("cannot read link %s: %s", volumePath, err)
}
return lockAndCheckSubPathWithoutSymlink(finalVolumePath, finalSubPath)
}
// lock all intermediate subPath directories and check they are all within volumePath
// volumePath & subPath should not contain any symlink, otherwise it will return error
func lockAndCheckSubPathWithoutSymlink(volumePath, subPath string) ([]uintptr, error) {
if len(volumePath) == 0 || len(subPath) == 0 {
return []uintptr{}, nil
}
// get relative path to volumePath
relSubPath, err := filepath.Rel(volumePath, subPath)
if err != nil {
return []uintptr{}, fmt.Errorf("Rel(%s, %s) error: %v", volumePath, subPath, err)
}
if strings.HasPrefix(relSubPath, "..") {
return []uintptr{}, fmt.Errorf("SubPath %q not within volume path %q", subPath, volumePath)
}
if relSubPath == "." {
// volumePath and subPath are equal
return []uintptr{}, nil
}
fileHandles := []uintptr{}
var errorResult error
currentFullPath := volumePath
dirs := strings.Split(relSubPath, string(os.PathSeparator))
for _, dir := range dirs {
// lock intermediate subPath directory first
currentFullPath = filepath.Join(currentFullPath, dir)
handle, err := lockPath(currentFullPath)
if err != nil {
errorResult = fmt.Errorf("cannot lock path %s: %s", currentFullPath, err)
break
}
fileHandles = append(fileHandles, handle)
// make sure intermediate subPath directory does not contain symlink any more
stat, err := os.Lstat(currentFullPath)
if err != nil {
errorResult = fmt.Errorf("Lstat(%q) error: %v", currentFullPath, err)
break
}
if stat.Mode()&os.ModeSymlink != 0 {
errorResult = fmt.Errorf("subpath %q is an unexpected symlink after EvalSymlinks", currentFullPath)
break
}
if !pathWithinBase(currentFullPath, volumePath) {
errorResult = fmt.Errorf("SubPath %q not within volume path %q", currentFullPath, volumePath)
break
}
}
return fileHandles, errorResult
}
// unlockPath unlock directories
func unlockPath(fileHandles []uintptr) {
if fileHandles != nil {
for _, handle := range fileHandles {
syscall.CloseHandle(syscall.Handle(handle))
}
}
}
// lockPath locks a directory or symlink, return handle, exec "syscall.CloseHandle(handle)" to unlock the path
func lockPath(path string) (uintptr, error) {
if len(path) == 0 {
return uintptr(syscall.InvalidHandle), syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return uintptr(syscall.InvalidHandle), err
}
access := uint32(syscall.GENERIC_READ)
sharemode := uint32(syscall.FILE_SHARE_READ)
createmode := uint32(syscall.OPEN_EXISTING)
flags := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS | syscall.FILE_FLAG_OPEN_REPARSE_POINT)
fd, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, flags, 0)
return uintptr(fd), err
}
// Lock all directories in subPath and check they're not symlinks.
func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
handles, err := lockAndCheckSubPath(subPath.VolumePath, subPath.Path)
// Unlock the directories when the container starts
cleanupAction = func() {
unlockPath(handles)
}
return subPath.Path, cleanupAction, err
}
// No bind-mounts for subpaths are necessary on Windows
func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error {
return nil
}
func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, fstype string, options []string) error { func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, fstype string, options []string) error {
// Try to mount the disk // Try to mount the disk
glog.V(4).Infof("Attempting to formatAndMount disk: %s %s %s", fstype, source, target) glog.V(4).Infof("Attempting to formatAndMount disk: %s %s %s", fstype, source, target)
@ -347,3 +461,120 @@ func getAllParentLinks(path string) ([]string, error) {
return links, nil return links, nil
} }
// SafeMakeDir makes sure that the created directory does not escape given base directory mis-using symlinks.
func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return doSafeMakeDir(pathname, base, perm)
}
func doSafeMakeDir(pathname string, base string, perm os.FileMode) error {
glog.V(4).Infof("Creating directory %q within base %q", pathname, base)
if !pathWithinBase(pathname, base) {
return fmt.Errorf("path %s is outside of allowed base %s", pathname, base)
}
// Quick check if the directory already exists
s, err := os.Stat(pathname)
if err == nil {
// Path exists
if s.IsDir() {
// The directory already exists. It can be outside of the parent,
// but there is no race-proof check.
glog.V(4).Infof("Directory %s already exists", pathname)
return nil
}
return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR}
}
// Find all existing directories
existingPath, toCreate, err := findExistingPrefix(base, pathname)
if err != nil {
return fmt.Errorf("error opening directory %s: %s", pathname, err)
}
if len(toCreate) == 0 {
return nil
}
// Ensure the existing directory is inside allowed base
fullExistingPath, err := filepath.EvalSymlinks(existingPath)
if err != nil {
return fmt.Errorf("error opening existing directory %s: %s", existingPath, err)
}
fullBasePath, err := filepath.EvalSymlinks(base)
if err != nil {
return fmt.Errorf("cannot read link %s: %s", base, err)
}
if !pathWithinBase(fullExistingPath, fullBasePath) {
return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err)
}
// lock all intermediate directories from fullBasePath to fullExistingPath (top to bottom)
fileHandles, err := lockAndCheckSubPathWithoutSymlink(fullBasePath, fullExistingPath)
defer unlockPath(fileHandles)
if err != nil {
return err
}
glog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...))
currentPath := fullExistingPath
// create the directories one by one, making sure nobody can change
// created directory into symlink by lock that directory immediately
for _, dir := range toCreate {
currentPath = filepath.Join(currentPath, dir)
glog.V(4).Infof("Creating %s", dir)
if err := os.Mkdir(currentPath, perm); err != nil {
return fmt.Errorf("cannot create directory %s: %s", currentPath, err)
}
handle, err := lockPath(currentPath)
if err != nil {
return fmt.Errorf("cannot lock path %s: %s", currentPath, err)
}
defer syscall.CloseHandle(syscall.Handle(handle))
// make sure newly created directory does not contain symlink after lock
stat, err := os.Lstat(currentPath)
if err != nil {
return fmt.Errorf("Lstat(%q) error: %v", currentPath, err)
}
if stat.Mode()&os.ModeSymlink != 0 {
return fmt.Errorf("subpath %q is an unexpected symlink after Mkdir", currentPath)
}
}
return nil
}
// findExistingPrefix finds prefix of pathname that exists. In addition, it
// returns list of remaining directories that don't exist yet.
func findExistingPrefix(base, pathname string) (string, []string, error) {
rel, err := filepath.Rel(base, pathname)
if err != nil {
return base, nil, err
}
if strings.HasPrefix(rel, "..") {
return base, nil, fmt.Errorf("pathname(%s) is not within base(%s)", pathname, base)
}
if rel == "." {
// base and pathname are equal
return pathname, []string{}, nil
}
dirs := strings.Split(rel, string(filepath.Separator))
parent := base
currentPath := base
for i, dir := range dirs {
parent = currentPath
currentPath = filepath.Join(parent, dir)
if _, err := os.Lstat(currentPath); err != nil {
if os.IsNotExist(err) {
return parent, dirs[i:], nil
}
return base, nil, err
}
}
return pathname, []string{}, nil
}

View File

@ -22,9 +22,12 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"regexp"
"strconv"
"strings" "strings"
"github.com/golang/glog" "github.com/golang/glog"
utilio "k8s.io/kubernetes/pkg/util/io"
"k8s.io/kubernetes/pkg/util/nsenter" "k8s.io/kubernetes/pkg/util/nsenter"
) )
@ -33,6 +36,13 @@ const (
hostProcMountsPath = "/rootfs/proc/1/mounts" hostProcMountsPath = "/rootfs/proc/1/mounts"
// hostProcMountinfoPath is the default mount info path for rootfs // hostProcMountinfoPath is the default mount info path for rootfs
hostProcMountinfoPath = "/rootfs/proc/1/mountinfo" hostProcMountinfoPath = "/rootfs/proc/1/mountinfo"
// hostProcSelfStatusPath is the default path to /proc/self/status on the host
hostProcSelfStatusPath = "/rootfs/proc/self/status"
)
var (
// pidRegExp matches "Pid: <pid>" in /proc/self/status
pidRegExp = regexp.MustCompile(`\nPid:\t([0-9]*)\n`)
) )
// Currently, all docker containers receive their own mount namespaces. // Currently, all docker containers receive their own mount namespaces.
@ -270,3 +280,41 @@ func (mounter *NsenterMounter) ExistsPath(pathname string) bool {
} }
return false return false
} }
func (mounter *NsenterMounter) CleanSubPaths(podDir string, volumeName string) error {
return doCleanSubPaths(mounter, podDir, volumeName)
}
// getPidOnHost returns kubelet's pid in the host pid namespace
func (mounter *NsenterMounter) getPidOnHost(procStatusPath string) (int, error) {
// Get the PID from /rootfs/proc/self/status
statusBytes, err := utilio.ConsistentRead(procStatusPath, maxListTries)
if err != nil {
return 0, fmt.Errorf("error reading %s: %s", procStatusPath, err)
}
matches := pidRegExp.FindSubmatch(statusBytes)
if len(matches) < 2 {
return 0, fmt.Errorf("cannot parse %s: no Pid:", procStatusPath)
}
return strconv.Atoi(string(matches[1]))
}
func (mounter *NsenterMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
hostPid, err := mounter.getPidOnHost(hostProcSelfStatusPath)
if err != nil {
return "", nil, err
}
glog.V(4).Infof("Kubelet's PID on the host is %d", hostPid)
// Bind-mount the subpath to avoid using symlinks in subpaths.
newHostPath, err = doBindSubPath(mounter, subPath, hostPid)
// There is no action when the container starts. Bind-mount will be cleaned
// when container stops by CleanSubPaths.
cleanupAction = nil
return newHostPath, cleanupAction, err
}
func (mounter *NsenterMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return doSafeMakeDir(pathname, base, perm)
}

View File

@ -20,6 +20,7 @@ package mount
import ( import (
"errors" "errors"
"os"
) )
type NsenterMounter struct{} type NsenterMounter struct{}
@ -85,3 +86,15 @@ func (*NsenterMounter) MakeFile(pathname string) error {
func (*NsenterMounter) ExistsPath(pathname string) bool { func (*NsenterMounter) ExistsPath(pathname string) bool {
return true return true
} }
func (*NsenterMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error {
return nil
}
func (*NsenterMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
return subPath.Path, nil, nil
}
func (*NsenterMounter) CleanSubPaths(podDir string, volumeName string) error {
return nil
}

View File

@ -38,6 +38,7 @@ const (
defaultFSType = "ext4" defaultFSType = "ext4"
defaultStorageAccountType = storage.StandardLRS defaultStorageAccountType = storage.StandardLRS
defaultAzureDiskKind = v1.AzureSharedBlobDisk defaultAzureDiskKind = v1.AzureSharedBlobDisk
defaultAzureDataDiskCachingMode = v1.AzureDataDiskCachingNone
) )
type dataDisk struct { type dataDisk struct {
@ -141,7 +142,7 @@ func normalizeStorageAccountType(storageAccountType string) (storage.SkuName, er
func normalizeCachingMode(cachingMode v1.AzureDataDiskCachingMode) (v1.AzureDataDiskCachingMode, error) { func normalizeCachingMode(cachingMode v1.AzureDataDiskCachingMode) (v1.AzureDataDiskCachingMode, error) {
if cachingMode == "" { if cachingMode == "" {
return v1.AzureDataDiskCachingReadWrite, nil return defaultAzureDataDiskCachingMode, nil
} }
if !supportedCachingModes.Has(string(cachingMode)) { if !supportedCachingModes.Has(string(cachingMode)) {

View File

@ -38,9 +38,9 @@ var _ volume.ProvisionableVolumePlugin = &azureFilePlugin{}
// azure cloud provider should implement it // azure cloud provider should implement it
type azureCloudProvider interface { type azureCloudProvider interface {
// create a file share // create a file share
CreateFileShare(name, storageAccount, storageType, location string, requestGB int) (string, string, error) CreateFileShare(shareName, accountName, accountType, location string, requestGiB int) (string, string, error)
// delete a file share // delete a file share
DeleteFileShare(accountName, key, name string) error DeleteFileShare(accountName, accountKey, shareName string) error
} }
type azureFileDeleter struct { type azureFileDeleter struct {

View File

@ -194,6 +194,9 @@ func (b *configMapVolumeMounter) SetUpAt(dir string, fsGroup *int64) error {
if err := wrapped.SetUpAt(dir, fsGroup); err != nil { if err := wrapped.SetUpAt(dir, fsGroup); err != nil {
return err return err
} }
if err := volumeutil.MakeNestedMountpoints(b.volName, dir, b.pod); err != nil {
return err
}
optional := b.source.Optional != nil && *b.source.Optional optional := b.source.Optional != nil && *b.source.Optional
configMap, err := b.getConfigMap(b.pod.Namespace, b.source.Name) configMap, err := b.getConfigMap(b.pod.Namespace, b.source.Name)

View File

@ -183,6 +183,9 @@ func (b *downwardAPIVolumeMounter) SetUpAt(dir string, fsGroup *int64) error {
glog.Errorf("Unable to setup downwardAPI volume %v for pod %v/%v: %s", b.volName, b.pod.Namespace, b.pod.Name, err.Error()) glog.Errorf("Unable to setup downwardAPI volume %v for pod %v/%v: %s", b.volName, b.pod.Namespace, b.pod.Name, err.Error())
return err return err
} }
if err := volumeutil.MakeNestedMountpoints(b.volName, dir, *b.pod); err != nil {
return err
}
data, err := CollectData(b.source.Items, b.pod, b.plugin.host, b.source.DefaultMode) data, err := CollectData(b.source.Items, b.pod, b.plugin.host, b.source.DefaultMode)
if err != nil { if err != nil {

View File

@ -34,6 +34,7 @@ const (
osdDriverVersion = "v1" osdDriverVersion = "v1"
pxdDriverName = "pxd" pxdDriverName = "pxd"
pvcClaimLabel = "pvc" pvcClaimLabel = "pvc"
pvcNamespaceLabel = "namespace"
pxServiceName = "portworx-service" pxServiceName = "portworx-service"
pxDriverName = "pxd-sched" pxDriverName = "pxd-sched"
) )
@ -80,9 +81,20 @@ func (util *PortworxVolumeUtil) CreateVolume(p *portworxVolumeProvisioner) (stri
// Add claim Name as a part of Portworx Volume Labels // Add claim Name as a part of Portworx Volume Labels
locator.VolumeLabels[pvcClaimLabel] = p.options.PVC.Name locator.VolumeLabels[pvcClaimLabel] = p.options.PVC.Name
locator.VolumeLabels[pvcNamespaceLabel] = p.options.PVC.Namespace
for k, v := range p.options.PVC.Annotations {
if _, present := spec.VolumeLabels[k]; present {
glog.Warningf("not saving annotation: %s=%s in spec labels due to an existing key", k, v)
continue
}
spec.VolumeLabels[k] = v
}
volumeID, err := driver.Create(locator, source, spec) volumeID, err := driver.Create(locator, source, spec)
if err != nil { if err != nil {
glog.Errorf("Error creating Portworx Volume : %v", err) glog.Errorf("Error creating Portworx Volume : %v", err)
return "", 0, nil, err
} }
glog.Infof("Successfully created Portworx volume for PVC: %v", p.options.PVC.Name) glog.Infof("Successfully created Portworx volume for PVC: %v", p.options.PVC.Name)

View File

@ -191,6 +191,9 @@ func (s *projectedVolumeMounter) SetUpAt(dir string, fsGroup *int64) error {
if err := wrapped.SetUpAt(dir, fsGroup); err != nil { if err := wrapped.SetUpAt(dir, fsGroup); err != nil {
return err return err
} }
if err := volumeutil.MakeNestedMountpoints(s.volName, dir, *s.pod); err != nil {
return err
}
data, err := s.collectData() data, err := s.collectData()
if err != nil { if err != nil {

View File

@ -193,6 +193,9 @@ func (b *secretVolumeMounter) SetUpAt(dir string, fsGroup *int64) error {
if err := wrapped.SetUpAt(dir, fsGroup); err != nil { if err := wrapped.SetUpAt(dir, fsGroup); err != nil {
return err return err
} }
if err := volumeutil.MakeNestedMountpoints(b.volName, dir, b.pod); err != nil {
return err
}
optional := b.source.Optional != nil && *b.source.Optional optional := b.source.Optional != nil && *b.source.Optional
secret, err := b.getSecret(b.pod.Namespace, b.source.SecretName) secret, err := b.getSecret(b.pod.Namespace, b.source.SecretName)

View File

@ -18,6 +18,7 @@ go_library(
"fs_unsupported.go", "fs_unsupported.go",
"io_util.go", "io_util.go",
"metrics.go", "metrics.go",
"nested_volumes.go",
"util.go", "util.go",
"util_unsupported.go", "util_unsupported.go",
] + select({ ] + select({
@ -63,6 +64,7 @@ go_test(
name = "go_default_test", name = "go_default_test",
srcs = [ srcs = [
"finalizer_test.go", "finalizer_test.go",
"nested_volumes_test.go",
"util_test.go", "util_test.go",
] + select({ ] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [ "@io_bazel_rules_go//go/platform:linux_amd64": [
@ -79,6 +81,7 @@ go_test(
"//vendor/github.com/davecgh/go-spew/spew:go_default_library", "//vendor/github.com/davecgh/go-spew/spew:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
] + select({ ] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [ "@io_bazel_rules_go//go/platform:linux_amd64": [

View File

@ -0,0 +1,99 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"fmt"
"k8s.io/api/core/v1"
"os"
"path"
"path/filepath"
"sort"
"strings"
)
// getNestedMountpoints returns a list of mountpoint directories that should be created
// for the volume indicated by name.
// note: the returned list is relative to baseDir
func getNestedMountpoints(name, baseDir string, pod v1.Pod) ([]string, error) {
var retval []string
checkContainer := func(container *v1.Container) error {
var allMountPoints []string // all mount points in this container
var myMountPoints []string // mount points that match name
for _, vol := range container.VolumeMounts {
cleaned := filepath.Clean(vol.MountPath)
allMountPoints = append(allMountPoints, cleaned)
if vol.Name == name {
myMountPoints = append(myMountPoints, cleaned)
}
}
sort.Strings(allMountPoints)
parentPrefix := ".." + string(os.PathSeparator)
// Examine each place where this volume is mounted
for _, myMountPoint := range myMountPoints {
if strings.HasPrefix(myMountPoint, parentPrefix) {
// Don't let a container trick us into creating directories outside of its rootfs
return fmt.Errorf("Invalid container mount point %v", myMountPoint)
}
myMPSlash := myMountPoint + string(os.PathSeparator)
// The previously found nested mountpoint (or "" if none found yet)
prevNestedMP := ""
// examine each mount point to see if it's nested beneath this volume
// (but skip any that are double-nested beneath this volume)
// For example, if this volume is mounted as /dir and other volumes are mounted
// as /dir/nested and /dir/nested/other, only create /dir/nested.
for _, mp := range allMountPoints {
if !strings.HasPrefix(mp, myMPSlash) {
continue // skip -- not nested beneath myMountPoint
}
if prevNestedMP != "" && strings.HasPrefix(mp, prevNestedMP) {
continue // skip -- double nested beneath myMountPoint
}
// since this mount point is nested, remember it so that we can check that following ones aren't nested beneath this one
prevNestedMP = mp + string(os.PathSeparator)
retval = append(retval, mp[len(myMPSlash):])
}
}
return nil
}
for _, container := range pod.Spec.InitContainers {
if err := checkContainer(&container); err != nil {
return nil, err
}
}
for _, container := range pod.Spec.Containers {
if err := checkContainer(&container); err != nil {
return nil, err
}
}
return retval, nil
}
// MakeNestedMountpoints creates mount points in baseDir for volumes mounted beneath name
func MakeNestedMountpoints(name, baseDir string, pod v1.Pod) error {
dirs, err := getNestedMountpoints(name, baseDir, pod)
if err != nil {
return err
}
for _, dir := range dirs {
err := os.MkdirAll(path.Join(baseDir, dir), 0755)
if err != nil {
return fmt.Errorf("Unable to create nested volume mountpoints: %v", err)
}
}
return nil
}

View File

@ -100,7 +100,7 @@ type OperationExecutor interface {
// UnmountVolume unmounts the volume from the pod specified in // UnmountVolume unmounts the volume from the pod specified in
// volumeToUnmount and updates the actual state of the world to reflect that. // volumeToUnmount and updates the actual state of the world to reflect that.
UnmountVolume(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) error UnmountVolume(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) error
// UnmountDevice unmounts the volumes global mount path from the device (for // UnmountDevice unmounts the volumes global mount path from the device (for
// attachable volumes only, freeing it for detach. It then updates the // attachable volumes only, freeing it for detach. It then updates the
@ -722,10 +722,11 @@ func (oe *operationExecutor) MountVolume(
func (oe *operationExecutor) UnmountVolume( func (oe *operationExecutor) UnmountVolume(
volumeToUnmount MountedVolume, volumeToUnmount MountedVolume,
actualStateOfWorld ActualStateOfWorldMounterUpdater) error { actualStateOfWorld ActualStateOfWorldMounterUpdater,
podsDir string) error {
unmountFunc, plugin, err := unmountFunc, plugin, err :=
oe.operationGenerator.GenerateUnmountVolumeFunc(volumeToUnmount, actualStateOfWorld) oe.operationGenerator.GenerateUnmountVolumeFunc(volumeToUnmount, actualStateOfWorld, podsDir)
if err != nil { if err != nil {
return err return err
} }
@ -847,7 +848,7 @@ type VolumeStateHandler interface {
// Volume is attached, mount/map it // Volume is attached, mount/map it
MountVolumeHandler(waitForAttachTimeout time.Duration, volumeToMount VolumeToMount, actualStateOfWorld ActualStateOfWorldMounterUpdater, isRemount bool, remountingLogStr string) error MountVolumeHandler(waitForAttachTimeout time.Duration, volumeToMount VolumeToMount, actualStateOfWorld ActualStateOfWorldMounterUpdater, isRemount bool, remountingLogStr string) error
// Volume is mounted/mapped, unmount/unmap it // Volume is mounted/mapped, unmount/unmap it
UnmountVolumeHandler(mountedVolume MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) error UnmountVolumeHandler(mountedVolume MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) error
// Volume is not referenced from pod, unmount/unmap and detach it // Volume is not referenced from pod, unmount/unmap and detach it
UnmountDeviceHandler(attachedVolume AttachedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, mounter mount.Interface) error UnmountDeviceHandler(attachedVolume AttachedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, mounter mount.Interface) error
// Reconstruct volume from mount path // Reconstruct volume from mount path
@ -913,11 +914,12 @@ func (f FilesystemVolumeHandler) MountVolumeHandler(waitForAttachTimeout time.Du
// UnmountVolumeHandler unmount a volume if a volume is mounted // UnmountVolumeHandler unmount a volume if a volume is mounted
// This method is handler for filesystem volume // This method is handler for filesystem volume
func (f FilesystemVolumeHandler) UnmountVolumeHandler(mountedVolume MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) error { func (f FilesystemVolumeHandler) UnmountVolumeHandler(mountedVolume MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) error {
glog.V(12).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountVolume", "")) glog.V(12).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountVolume", ""))
err := f.oe.UnmountVolume( err := f.oe.UnmountVolume(
mountedVolume, mountedVolume,
actualStateOfWorld) actualStateOfWorld,
podsDir)
return err return err
} }
@ -976,7 +978,7 @@ func (b BlockVolumeHandler) MountVolumeHandler(waitForAttachTimeout time.Duratio
// UnmountVolumeHandler unmap a volume if a volume is mapped // UnmountVolumeHandler unmap a volume if a volume is mapped
// This method is handler for block volume // This method is handler for block volume
func (b BlockVolumeHandler) UnmountVolumeHandler(mountedVolume MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) error { func (b BlockVolumeHandler) UnmountVolumeHandler(mountedVolume MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) error {
glog.V(12).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmapVolume", "")) glog.V(12).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmapVolume", ""))
err := b.oe.UnmapVolume( err := b.oe.UnmapVolume(
mountedVolume, mountedVolume,

View File

@ -19,6 +19,7 @@ package operationexecutor
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"path"
"strings" "strings"
"time" "time"
@ -86,7 +87,7 @@ type OperationGenerator interface {
GenerateMountVolumeFunc(waitForAttachTimeout time.Duration, volumeToMount VolumeToMount, actualStateOfWorldMounterUpdater ActualStateOfWorldMounterUpdater, isRemount bool) (func() error, string, error) GenerateMountVolumeFunc(waitForAttachTimeout time.Duration, volumeToMount VolumeToMount, actualStateOfWorldMounterUpdater ActualStateOfWorldMounterUpdater, isRemount bool) (func() error, string, error)
// Generates the UnmountVolume function needed to perform the unmount of a volume plugin // Generates the UnmountVolume function needed to perform the unmount of a volume plugin
GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) (func() error, string, error) GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) (func() error, string, error)
// Generates the AttachVolume function needed to perform attach of a volume plugin // Generates the AttachVolume function needed to perform attach of a volume plugin
GenerateAttachVolumeFunc(volumeToAttach VolumeToAttach, actualStateOfWorld ActualStateOfWorldAttacherUpdater) (func() error, string, error) GenerateAttachVolumeFunc(volumeToAttach VolumeToAttach, actualStateOfWorld ActualStateOfWorldAttacherUpdater) (func() error, string, error)
@ -608,7 +609,8 @@ func (og *operationGenerator) resizeFileSystem(volumeToMount VolumeToMount, devi
func (og *operationGenerator) GenerateUnmountVolumeFunc( func (og *operationGenerator) GenerateUnmountVolumeFunc(
volumeToUnmount MountedVolume, volumeToUnmount MountedVolume,
actualStateOfWorld ActualStateOfWorldMounterUpdater) (func() error, string, error) { actualStateOfWorld ActualStateOfWorldMounterUpdater,
podsDir string) (func() error, string, error) {
// Get mountable plugin // Get mountable plugin
volumePlugin, err := volumePlugin, err :=
og.volumePluginMgr.FindPluginByName(volumeToUnmount.PluginName) og.volumePluginMgr.FindPluginByName(volumeToUnmount.PluginName)
@ -623,6 +625,14 @@ func (og *operationGenerator) GenerateUnmountVolumeFunc(
} }
return func() error { return func() error {
mounter := og.volumePluginMgr.Host.GetMounter(volumeToUnmount.PluginName)
// Remove all bind-mounts for subPaths
podDir := path.Join(podsDir, string(volumeToUnmount.PodUID))
if err := mounter.CleanSubPaths(podDir, volumeToUnmount.OuterVolumeSpecName); err != nil {
return volumeToUnmount.GenerateErrorDetailed("error cleaning subPath mounts", err)
}
// Execute unmount // Execute unmount
unmountErr := volumeUnmounter.TearDown() unmountErr := volumeUnmounter.TearDown()
if unmountErr != nil { if unmountErr != nil {

Some files were not shown because too many files have changed in this diff Show More