influxdb/services/hh/queue.go

667 lines
15 KiB
Go
Raw Normal View History

package hh
import (
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"time"
)
var (
ErrNotOpen = fmt.Errorf("queue not open")
ErrQueueFull = fmt.Errorf("queue is full")
ErrSegmentFull = fmt.Errorf("segment is full")
)
const (
defaultSegmentSize = 10 * 1024 * 1024
footerSize = 8
)
// queue is a bounded, disk-backed, append-only type that combines queue and
// log semantics. byte slices can be appended and read back in-order.
// The queue maintains a pointer to the current head
// byte slice and can re-read from the head until it has been advanced.
//
// Internally, the queue writes byte slices to multiple segment files so
// that disk space can be reclaimed. When a segment file is larger than
// the max segment size, a new file is created. Segments are removed
// after their head pointer has advanced past the last entry. The first
// segment is the head, and the last segment is the tail. Reads are from
// the head segment and writes tail segment.
//
// queues can have a max size configured such that when the size of all
// segments on disk exceeds the size, write will fail.
//
// ┌─────┐
// │Head │
// ├─────┘
// │
// ▼
// ┌─────────────────┐ ┌─────────────────┐┌─────────────────┐
// │Segment 1 - 10MB │ │Segment 2 - 10MB ││Segment 3 - 10MB │
// └─────────────────┘ └─────────────────┘└─────────────────┘
// ▲
// │
// │
// ┌─────┐
// │Tail │
// └─────┘
type queue struct {
mu sync.RWMutex
// Directory to create segments
dir string
// The head and tail segments. Reads are from the beginning of head,
// writes are appended to the tail.
head, tail *segment
// The maximum size in bytes of a segment file before a new one should be created
maxSegmentSize int64
// The maximum size allowed in bytes of all segments before writes will return
// an error
maxSize int64
// The segments that exist on disk
segments segments
}
type segments []*segment
// newQueue create a queue that will store segments in dir and that will
// consume more than maxSize on disk.
func newQueue(dir string, maxSize int64) (*queue, error) {
return &queue{
dir: dir,
maxSegmentSize: defaultSegmentSize,
maxSize: maxSize,
segments: segments{},
}, nil
}
// Open opens the queue for reading and writing
func (l *queue) Open() error {
l.mu.Lock()
defer l.mu.Unlock()
segments, err := l.loadSegments()
if err != nil {
return err
}
l.segments = segments
if len(l.segments) == 0 {
_, err := l.addSegment()
if err != nil {
return err
}
}
l.head = l.segments[0]
l.tail = l.segments[len(l.segments)-1]
// If the head has been fully advanced and the segment size is modified,
// existing segments an get stuck and never allow clients to advance further.
// This advances the segment if the current head is already at the end.
_, err = l.head.current()
if err == io.EOF {
return l.trimHead()
}
return nil
}
// Close stops the queue for reading and writing
func (l *queue) Close() error {
l.mu.Lock()
defer l.mu.Unlock()
for _, s := range l.segments {
if err := s.close(); err != nil {
return err
}
}
l.head = nil
l.tail = nil
l.segments = nil
return nil
}
// SetMaxSegmentSize updates the max segment size for new and existing
// segments.
func (l *queue) SetMaxSegmentSize(size int64) error {
l.mu.Lock()
defer l.mu.Unlock()
l.maxSegmentSize = size
for _, s := range l.segments {
s.SetMaxSegmentSize(size)
}
if l.tail.diskUsage() >= l.maxSegmentSize {
segment, err := l.addSegment()
if err != nil {
return err
}
l.tail = segment
}
return nil
}
func (l *queue) PurgeOlderThan(when time.Time) error {
l.mu.Lock()
defer l.mu.Unlock()
// Add a new empty segment so old ones can be reclaimed
if _, err := l.addSegment(); err != nil {
return err
}
cutoff := when.Truncate(time.Second)
for {
mod, err := l.head.lastModified()
if err != nil {
return err
}
if mod.After(cutoff) || mod.Equal(cutoff) {
return nil
}
if err := l.trimHead(); err != nil {
return err
}
}
}
// diskUsage returns the total size on disk used by the queue
func (l *queue) diskUsage() int64 {
var size int64
for _, s := range l.segments {
size += s.diskUsage()
}
return size
}
// addSegment creates a new empty segment file
func (l *queue) addSegment() (*segment, error) {
nextID, err := l.nextSegmentID()
if err != nil {
return nil, err
}
segment, err := newSegment(filepath.Join(l.dir, strconv.FormatUint(nextID, 10)), l.maxSegmentSize)
if err != nil {
return nil, err
}
l.segments = append(l.segments, segment)
return segment, nil
}
// loadSegments loads all segments on disk
func (l *queue) loadSegments() (segments, error) {
segments := []*segment{}
files, err := ioutil.ReadDir(l.dir)
if err != nil {
return segments, err
}
for _, segment := range files {
// Segments should be files. Skip anything that is not a dir.
if segment.IsDir() {
continue
}
// Segments file names are all numeric
_, err := strconv.ParseUint(segment.Name(), 10, 64)
if err != nil {
continue
}
segment, err := newSegment(filepath.Join(l.dir, segment.Name()), l.maxSegmentSize)
if err != nil {
return segments, err
}
segments = append(segments, segment)
}
return segments, nil
}
// nextSegmentID returns the next segment ID that is free
func (l *queue) nextSegmentID() (uint64, error) {
segments, err := ioutil.ReadDir(l.dir)
if err != nil {
return 0, err
}
var maxID uint64
for _, segment := range segments {
// Segments should be files. Skip anything that is not a dir.
if segment.IsDir() {
continue
}
// Segments file names are all numeric
segmentID, err := strconv.ParseUint(segment.Name(), 10, 64)
if err != nil {
continue
}
if segmentID > maxID {
maxID = segmentID
}
}
return maxID + 1, nil
}
// Append appends a byte slice to the end of the queue
func (l *queue) Append(b []byte) error {
l.mu.Lock()
defer l.mu.Unlock()
if l.tail == nil {
return ErrNotOpen
}
if l.diskUsage()+int64(len(b)) > l.maxSize {
return ErrQueueFull
}
// Append the entry to the tail, if the segment is full,
// try to create new segment and retry the append
if err := l.tail.append(b); err == ErrSegmentFull {
segment, err := l.addSegment()
if err != nil {
return err
}
l.tail = segment
return l.tail.append(b)
}
return nil
}
// Current returns the current byte slice at the head of the queue
func (l *queue) Current() ([]byte, error) {
if l.head == nil {
return nil, ErrNotOpen
}
return l.head.current()
}
// Advance moves the head point to the next byte slice in the queue
func (l *queue) Advance() error {
l.mu.Lock()
defer l.mu.Unlock()
if l.head == nil {
return ErrNotOpen
}
err := l.head.advance()
if err == io.EOF {
if err := l.trimHead(); err != nil {
return err
}
}
return nil
}
func (l *queue) trimHead() error {
if len(l.segments) > 1 {
l.segments = l.segments[1:]
if err := l.head.close(); err != nil {
return err
}
if err := os.Remove(l.head.path); err != nil {
return err
}
l.head = l.segments[0]
}
return nil
}
// Segment is a queue using a single file. The structure of a segment is a series
// lengths + block with a single footer point to the position in the segment of the
// current head block.
//
// ┌──────────────────────────┐ ┌──────────────────────────┐ ┌────────────┐
// │ Block 1 │ │ Block 2 │ │ Footer │
// └──────────────────────────┘ └──────────────────────────┘ └────────────┘
// ┌────────────┐┌────────────┐ ┌────────────┐┌────────────┐ ┌────────────┐
// │Block 1 Len ││Block 1 Body│ │Block 2 Len ││Block 2 Body│ │Head Offset │
// │ 8 bytes ││ N bytes │ │ 8 bytes ││ N bytes │ │ 8 bytes │
// └────────────┘└────────────┘ └────────────┘└────────────┘ └────────────┘
//
// The footer holds the pointer to the head entry at the end of the segment to allow writes
// to seek to the end and write sequentially (vs having to seek back to the beginning of
// the segment to update the head pointer). Reads must seek to the end then back into the
// segment offset stored in the footer.
//
// Segments store arbitrary byte slices and leave the serialization to the caller. Segments
// are created with a max size and will block writes when the segment is full.
type segment struct {
mu sync.RWMutex
size int64
file *os.File
path string
pos int64
currentSize int64
maxSize int64
}
func newSegment(path string, maxSize int64) (*segment, error) {
f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0600)
if err != nil {
return nil, err
}
stats, err := os.Stat(path)
if err != nil {
return nil, err
}
s := &segment{file: f, path: path, size: stats.Size(), maxSize: maxSize}
if err := s.open(); err != nil {
return nil, err
}
return s, nil
}
func (l *segment) open() error {
l.mu.Lock()
defer l.mu.Unlock()
// If it's a new segment then write the location of the current record in this segment
if l.size == 0 {
l.pos = 0
l.currentSize = 0
if err := l.writeUint64(uint64(l.pos)); err != nil {
return err
}
if err := l.file.Sync(); err != nil {
return err
}
l.size = footerSize
return nil
}
// Existing segment so read the current position and the size of the current block
if err := l.seekEnd(-footerSize); err != nil {
return err
}
pos, err := l.readUint64()
if err != nil {
return err
}
l.pos = int64(pos)
if err := l.seekToCurrent(); err != nil {
return err
}
// If we're at the end of the segment, don't read the current block size,
// it's 0.
if l.pos < l.size-footerSize {
currentSize, err := l.readUint64()
if err != nil {
return err
}
l.currentSize = int64(currentSize)
}
return nil
}
// append adds byte slice to the end of segment
func (l *segment) append(b []byte) error {
l.mu.Lock()
defer l.mu.Unlock()
if l.file == nil {
return ErrNotOpen
}
if l.size+int64(len(b)) > l.maxSize {
return ErrSegmentFull
}
if err := l.seekEnd(-footerSize); err != nil {
return err
}
if err := l.writeUint64(uint64(len(b))); err != nil {
return err
}
if err := l.writeBytes(b); err != nil {
return err
}
if err := l.writeUint64(uint64(l.pos)); err != nil {
return err
}
if err := l.file.Sync(); err != nil {
return err
}
if l.currentSize == 0 {
l.currentSize = int64(len(b))
}
l.size += int64(len(b)) + 8 // uint64 for slice length
return nil
}
// current returns byte slice that the current segment points
func (l *segment) current() ([]byte, error) {
l.mu.Lock()
defer l.mu.Unlock()
if int64(l.pos) == l.size-8 {
return nil, io.EOF
}
if err := l.seekToCurrent(); err != nil {
return nil, err
}
// read the record size
sz, err := l.readUint64()
if err != nil {
return nil, err
}
l.currentSize = int64(sz)
if int64(sz) > l.maxSize {
return nil, fmt.Errorf("record size out of range: max %d: got %d", l.maxSize, sz)
}
b := make([]byte, sz)
if err := l.readBytes(b); err != nil {
return nil, err
}
return b, nil
}
// advance advances the current value pointer
func (l *segment) advance() error {
l.mu.Lock()
defer l.mu.Unlock()
if l.file == nil {
return ErrNotOpen
}
2015-06-06 00:58:20 +00:00
// If we're at the end of the file, can't advance
if int64(l.pos) == l.size-footerSize {
l.currentSize = 0
return io.EOF
}
if err := l.seekEnd(-footerSize); err != nil {
return err
}
pos := l.pos + l.currentSize + 8
if err := l.writeUint64(uint64(pos)); err != nil {
return err
}
if err := l.file.Sync(); err != nil {
return err
}
l.pos = pos
if err := l.seekToCurrent(); err != nil {
return err
}
sz, err := l.readUint64()
if err != nil {
return err
}
l.currentSize = int64(sz)
if int64(l.pos) == l.size-footerSize {
l.currentSize = 0
return io.EOF
}
return nil
}
func (l *segment) close() error {
l.mu.Lock()
defer l.mu.Unlock()
if err := l.file.Close(); err != nil {
return err
}
l.file = nil
return nil
}
func (l *segment) lastModified() (time.Time, error) {
l.mu.RLock()
defer l.mu.RUnlock()
stats, err := os.Stat(l.file.Name())
if err != nil {
return time.Time{}, err
}
return stats.ModTime(), nil
}
func (l *segment) diskUsage() int64 {
l.mu.RLock()
defer l.mu.RUnlock()
return l.size
}
func (l *segment) SetMaxSegmentSize(size int64) {
l.mu.Lock()
defer l.mu.Unlock()
l.maxSize = size
}
func (l *segment) seekToCurrent() error {
return l.seek(int64(l.pos))
}
func (l *segment) seek(pos int64) error {
n, err := l.file.Seek(pos, os.SEEK_SET)
if err != nil {
return err
}
if n != pos {
return fmt.Errorf("bad seek. exp %v, got %v", 0, n)
}
return nil
}
func (l *segment) seekEnd(pos int64) error {
_, err := l.file.Seek(pos, os.SEEK_END)
if err != nil {
return err
}
return nil
}
func (l *segment) filePos() int64 {
n, _ := l.file.Seek(0, os.SEEK_CUR)
return n
}
func (l *segment) readUint64() (uint64, error) {
b := make([]byte, 8)
if err := l.readBytes(b); err != nil {
return 0, err
}
return btou64(b), nil
}
func (l *segment) writeUint64(sz uint64) error {
return l.writeBytes(u64tob(sz))
}
func (l *segment) writeBytes(b []byte) error {
n, err := l.file.Write(b)
if err != nil {
return err
}
if n != len(b) {
return fmt.Errorf("short write. got %d, exp %d", n, len(b))
}
return nil
}
func (l *segment) readBytes(b []byte) error {
n, err := l.file.Read(b)
if err != nil {
return err
}
if n != len(b) {
return fmt.Errorf("bad read. exp %v, got %v", 0, n)
}
return nil
}
func u64tob(v uint64) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, v)
return b
}
func btou64(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}