Fix race on store close

There was a very small window where it was possible to deadlock during
the close of the Store. When closing, the Store waited on its Waitgroup
under a `Lock`. Naturally, all other goroutines must have been in a
position to call `Done` on the `Waitgroup` before the `Wait` call in
`Close` would return.

For the goroutine running the `monitorShards` method it was possible
that it would be unable to do this. Specifically, if the `monitorShards`
goroutine was jumping into the `t.C` case as the `Close()` goroutine was
acquiring the `Lock` then then `monitorShards` goroutine would be unable
to acquire the `RLock`. Since it would also be unable to progress around
its loop to jump into the `s.closing` case, it would be unable to call
`Done` on the `WaitGroup` and we would have a deadlock.

This was identified during an AppVeyor CI run, though I was unable to
reproduce this locally.
pull/9068/head
Edd Robinson 2017-11-07 12:48:43 +00:00
parent e69217440b
commit e762da9aca
1 changed files with 11 additions and 4 deletions

View File

@ -132,6 +132,11 @@ func (s *Store) Open() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.opened {
// Already open
return nil
}
s.closing = make(chan struct{})
s.shards = map[uint64]*Shard{}
@ -291,12 +296,13 @@ func (s *Store) loadShards() error {
// shards through the Store will result in ErrStoreClosed being returned.
func (s *Store) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.opened {
close(s.closing)
}
s.mu.Unlock()
s.wg.Wait()
// No other goroutines accessing the store, so no need for a Lock.
// Close all the shards in parallel.
if err := s.walkShards(s.shardsSlice(), func(sh *Shard) error {
@ -305,9 +311,10 @@ func (s *Store) Close() error {
return err
}
s.opened = false
s.mu.Lock()
s.shards = nil
s.opened = false // Store may now be opened again.
s.mu.Unlock()
return nil
}