2015-10-01 19:23:38 +00:00
package tsm1
2015-09-09 18:29:50 +00:00
import (
2015-09-17 15:23:27 +00:00
"encoding/json"
2015-09-09 18:29:50 +00:00
"fmt"
"io"
"log"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
2015-09-25 14:49:26 +00:00
"github.com/influxdb/influxdb/models"
2015-09-09 18:29:50 +00:00
"github.com/influxdb/influxdb/tsdb"
2015-09-25 14:49:26 +00:00
"github.com/golang/snappy"
2015-09-09 18:29:50 +00:00
)
// flushType indiciates why a flush and compaction are being run so the partition can
// do the appropriate type of compaction
type flushType int
const (
// noFlush indicates that no flush or compaction are necesssary at this time
noFlush flushType = iota
// memoryFlush indicates that we should look for the series using the most
// memory to flush out and compact all others
memoryFlush
// idleFlush indicates that we should flush all series in the parition,
// delete all segment files and hold off on opening a new one
idleFlush
2015-09-17 15:23:27 +00:00
// startupFlush indicates that we're flushing because the database is starting up
startupFlush
2015-11-19 06:15:44 +00:00
writeBufLen = 32 << 10
2015-09-09 18:29:50 +00:00
)
const (
pointsEntry walEntryType = 0x01
fieldsEntry walEntryType = 0x02
seriesEntry walEntryType = 0x03
2015-10-01 19:16:28 +00:00
deleteEntry walEntryType = 0x04
2015-09-09 18:29:50 +00:00
)
type Log struct {
path string
// write variables
writeLock sync . Mutex
currentSegmentID int
currentSegmentFile * os . File
currentSegmentSize int
2015-09-17 19:46:53 +00:00
// cache and flush variables
2015-10-21 22:15:26 +00:00
closing chan struct { }
2015-09-09 18:29:50 +00:00
cacheLock sync . RWMutex
2015-09-17 19:46:53 +00:00
lastWriteTime time . Time
flushRunning bool
2015-09-09 18:29:50 +00:00
cache map [ string ] Values
cacheDirtySort map [ string ] bool // this map should be small, only for dirty vals
flushCache map [ string ] Values // temporary map while flushing
memorySize int
measurementFieldsCache map [ string ] * tsdb . MeasurementFields
seriesToCreateCache [ ] * tsdb . SeriesCreate
// LogOutput is the writer used by the logger.
LogOutput io . Writer
logger * log . Logger
// FlushColdInterval is the period of time after which a partition will do a
// full flush and compaction if it has been cold for writes.
FlushColdInterval time . Duration
// SegmentSize is the file size at which a segment file will be rotated
SegmentSize int
2015-09-17 19:46:53 +00:00
// FlushMemorySizeThreshold specifies when the log should be forced to be flushed
FlushMemorySizeThreshold int
// MaxMemorySizeThreshold specifies the limit at which writes to the WAL should be rejected
MaxMemorySizeThreshold int
2015-09-09 18:29:50 +00:00
2015-10-13 18:35:48 +00:00
// IndexWriter is the database series will be flushed to
IndexWriter IndexWriter
2015-09-09 18:29:50 +00:00
// LoggingEnabled specifies if detailed logs should be output
LoggingEnabled bool
// SkipCache specifies if the wal should immediately write to the index instead of
// caching data in memory. False by default so we buffer in memory before flushing to index.
SkipCache bool
// SkipDurability specifies if the wal should not write the wal entries to disk.
// False by default which means all writes are durable even when cached before flushing to index.
SkipDurability bool
}
// IndexWriter is an interface for the indexed database the WAL flushes data to
type IndexWriter interface {
2015-09-27 18:45:55 +00:00
Write ( valuesByKey map [ string ] Values , measurementFieldsToSave map [ string ] * tsdb . MeasurementFields , seriesToCreate [ ] * tsdb . SeriesCreate ) error
2015-10-01 19:16:28 +00:00
MarkDeletes ( keys [ ] string )
2015-10-05 21:21:07 +00:00
MarkMeasurementDelete ( name string )
2015-09-09 18:29:50 +00:00
}
func NewLog ( path string ) * Log {
return & Log {
path : path ,
// these options should be overriden by any options in the config
2015-09-17 19:46:53 +00:00
LogOutput : os . Stderr ,
FlushColdInterval : tsdb . DefaultFlushColdInterval ,
SegmentSize : DefaultSegmentSize ,
FlushMemorySizeThreshold : tsdb . DefaultFlushMemorySizeThreshold ,
MaxMemorySizeThreshold : tsdb . DefaultMaxMemorySizeThreshold ,
2015-10-01 19:23:38 +00:00
logger : log . New ( os . Stderr , "[tsm1wal] " , log . LstdFlags ) ,
2015-10-21 22:15:26 +00:00
closing : make ( chan struct { } ) ,
2015-09-09 18:29:50 +00:00
}
}
2015-10-13 18:35:48 +00:00
// Path returns the path the log was initialized with.
func ( l * Log ) Path ( ) string { return l . path }
2015-09-09 18:29:50 +00:00
// Open opens and initializes the Log. Will recover from previous unclosed shutdowns
func ( l * Log ) Open ( ) error {
if l . LoggingEnabled {
2015-10-01 19:23:38 +00:00
l . logger . Printf ( "tsm1 WAL starting with %d flush memory size threshold and %d max memory size threshold\n" , l . FlushMemorySizeThreshold , l . MaxMemorySizeThreshold )
l . logger . Printf ( "tsm1 WAL writing to %s\n" , l . path )
2015-09-09 18:29:50 +00:00
}
if err := os . MkdirAll ( l . path , 0777 ) ; err != nil {
return err
}
l . cache = make ( map [ string ] Values )
l . cacheDirtySort = make ( map [ string ] bool )
l . measurementFieldsCache = make ( map [ string ] * tsdb . MeasurementFields )
2015-10-22 05:41:17 +00:00
l . closing = make ( chan struct { } )
2015-09-17 15:23:27 +00:00
// flush out any WAL entries that are there from before
if err := l . readAndFlushWAL ( ) ; err != nil {
return err
}
2015-10-22 05:41:17 +00:00
2015-09-09 18:29:50 +00:00
return nil
}
2015-09-27 23:37:03 +00:00
// Cursor will return a cursor object to Seek and iterate with Next for the WAL cache for the given.
// This should only ever be called by the engine cursor method, which will always give it
// exactly one field.
2015-09-25 14:49:26 +00:00
func ( l * Log ) Cursor ( series string , fields [ ] string , dec * tsdb . FieldCodec , ascending bool ) tsdb . Cursor {
2015-09-09 18:29:50 +00:00
l . cacheLock . RLock ( )
defer l . cacheLock . RUnlock ( )
2015-09-27 23:37:03 +00:00
if len ( fields ) != 1 {
panic ( "wal cursor should only ever be called with 1 field" )
2015-09-26 14:47:36 +00:00
}
2015-10-05 21:21:07 +00:00
ck := SeriesFieldKey ( series , fields [ 0 ] )
2015-09-09 18:29:50 +00:00
values := l . cache [ ck ]
// if we're in the middle of a flush, combine the previous cache
// with this one for the cursor
if l . flushCache != nil {
if fc , ok := l . flushCache [ ck ] ; ok {
c := make ( [ ] Value , len ( fc ) , len ( fc ) + len ( values ) )
copy ( c , fc )
c = append ( c , values ... )
2015-12-03 13:11:50 +00:00
return newWALCursor ( Values ( c ) . Deduplicate ( ) , ascending )
2015-09-09 18:29:50 +00:00
}
}
if l . cacheDirtySort [ ck ] {
2015-12-03 13:11:50 +00:00
values = Values ( values ) . Deduplicate ( )
2015-09-09 18:29:50 +00:00
}
// build a copy so writes afterwards don't change the result set
a := make ( [ ] Value , len ( values ) )
copy ( a , values )
2015-09-25 14:49:26 +00:00
return newWALCursor ( a , ascending )
2015-09-09 18:29:50 +00:00
}
2015-09-25 14:49:26 +00:00
func ( l * Log ) WritePoints ( points [ ] models . Point , fields map [ string ] * tsdb . MeasurementFields , series [ ] * tsdb . SeriesCreate ) error {
2015-09-17 19:46:53 +00:00
// add everything to the cache, or return an error if we've hit our max memory
2015-10-21 22:33:51 +00:00
if err := l . addToCache ( points , fields , series , true ) ; err != nil {
return err
2015-09-17 19:46:53 +00:00
}
2015-09-09 18:29:50 +00:00
// make the write durable if specified
if ! l . SkipDurability {
2015-09-17 15:23:27 +00:00
// write the points
2015-09-09 18:29:50 +00:00
pointStrings := make ( [ ] string , len ( points ) )
for i , p := range points {
pointStrings [ i ] = p . String ( )
}
data := strings . Join ( pointStrings , "\n" )
compressed := snappy . Encode ( nil , [ ] byte ( data ) )
if err := l . writeToLog ( pointsEntry , compressed ) ; err != nil {
return err
}
2015-09-17 15:23:27 +00:00
// write the new fields
if len ( fields ) > 0 {
data , err := json . Marshal ( fields )
if err != nil {
return err
}
compressed = snappy . Encode ( compressed , data )
if err := l . writeToLog ( fieldsEntry , compressed ) ; err != nil {
return err
}
}
2015-09-09 18:29:50 +00:00
2015-09-17 15:23:27 +00:00
// write the new series
if len ( series ) > 0 {
data , err := json . Marshal ( series )
if err != nil {
return err
}
compressed = snappy . Encode ( compressed , data )
if err := l . writeToLog ( seriesEntry , compressed ) ; err != nil {
return err
}
}
2015-09-09 18:29:50 +00:00
}
2015-09-17 15:23:27 +00:00
// usually skipping the cache is only for testing purposes and this was the easiest
// way to represent the logic (to cache and then immediately flush)
if l . SkipCache {
2015-10-21 15:12:34 +00:00
if err := l . flush ( idleFlush ) ; err != nil {
return err
}
2015-09-17 15:23:27 +00:00
}
return nil
}
2015-09-17 19:46:53 +00:00
// addToCache will add the points, measurements, and fields to the cache and return true if successful. They will be queryable
// immediately after return and will be flushed at the next flush cycle. Before adding to the cache we check if we're over the
2015-10-21 22:33:51 +00:00
// max memory threshold. If we are we request a flush in a new goroutine and return an error, indicating we didn't add the values
2015-09-17 19:46:53 +00:00
// to the cache and that writes should return a failure.
2015-10-21 22:33:51 +00:00
func ( l * Log ) addToCache ( points [ ] models . Point , fields map [ string ] * tsdb . MeasurementFields , series [ ] * tsdb . SeriesCreate , checkMemory bool ) error {
2015-09-09 18:29:50 +00:00
l . cacheLock . Lock ( )
2015-09-17 15:23:27 +00:00
defer l . cacheLock . Unlock ( )
2015-10-22 16:12:07 +00:00
// Make sure the log has not been closed
2015-10-21 22:33:51 +00:00
select {
case <- l . closing :
return ErrWALClosed
default :
}
2015-09-17 19:46:53 +00:00
// if we should check memory and we're over the threshold, mark a flush as running and kick one off in a goroutine
2015-09-29 01:04:02 +00:00
if checkMemory && l . memorySize > l . FlushMemorySizeThreshold {
2015-09-17 19:46:53 +00:00
if ! l . flushRunning {
l . flushRunning = true
2015-10-21 15:12:34 +00:00
go func ( ) {
if err := l . flush ( memoryFlush ) ; err != nil {
l . logger . Printf ( "addToCache: failed to flush: %v" , err )
}
} ( )
2015-09-17 19:46:53 +00:00
}
2015-09-29 01:04:02 +00:00
if l . memorySize > l . MaxMemorySizeThreshold {
2015-10-21 22:33:51 +00:00
return fmt . Errorf ( "WAL backed up flushing to index, hit max memory" )
2015-09-29 01:04:02 +00:00
}
2015-09-17 19:46:53 +00:00
}
2015-09-09 18:29:50 +00:00
for _ , p := range points {
for name , value := range p . Fields ( ) {
2015-10-05 21:21:07 +00:00
k := SeriesFieldKey ( string ( p . Key ( ) ) , name )
2015-09-09 18:29:50 +00:00
v := NewValue ( p . Time ( ) , value )
cacheValues := l . cache [ k ]
// only mark it as dirty if it isn't already
if _ , ok := l . cacheDirtySort [ k ] ; ! ok && len ( cacheValues ) > 0 {
2015-09-26 13:02:14 +00:00
dirty := cacheValues [ len ( cacheValues ) - 1 ] . Time ( ) . UnixNano ( ) >= v . Time ( ) . UnixNano ( )
2015-09-09 18:29:50 +00:00
if dirty {
l . cacheDirtySort [ k ] = true
}
}
l . memorySize += v . Size ( )
l . cache [ k ] = append ( cacheValues , v )
}
}
for k , v := range fields {
l . measurementFieldsCache [ k ] = v
}
l . seriesToCreateCache = append ( l . seriesToCreateCache , series ... )
l . lastWriteTime = time . Now ( )
2015-09-17 19:46:53 +00:00
2015-10-21 22:33:51 +00:00
return nil
2015-09-17 15:23:27 +00:00
}
2015-09-09 18:29:50 +00:00
2015-09-29 02:50:00 +00:00
func ( l * Log ) LastWriteTime ( ) time . Time {
l . cacheLock . RLock ( )
defer l . cacheLock . RUnlock ( )
return l . lastWriteTime
}
2015-09-17 15:23:27 +00:00
// readAndFlushWAL is called on open and will read the segment files in, flushing whenever
// the memory gets over the limit. Once all files have been read it will flush and remove the files
func ( l * Log ) readAndFlushWAL ( ) error {
files , err := l . segmentFileNames ( )
if err != nil {
return err
}
// read all the segment files and cache them, flushing along the way if we
// hit memory limits
for _ , fn := range files {
if err := l . readFileToCache ( fn ) ; err != nil {
return err
}
2015-09-17 19:46:53 +00:00
if l . memorySize > l . MaxMemorySizeThreshold {
2015-09-17 15:23:27 +00:00
if err := l . flush ( memoryFlush ) ; err != nil {
return err
}
}
}
// now flush and remove all the old files
if err := l . flush ( startupFlush ) ; err != nil {
return err
2015-09-09 18:29:50 +00:00
}
return nil
}
2015-09-17 15:23:27 +00:00
func ( l * Log ) readFileToCache ( fileName string ) error {
f , err := os . OpenFile ( fileName , os . O_RDONLY , 0666 )
if err != nil {
return err
}
defer f . Close ( )
buf := make ( [ ] byte , writeBufLen )
data := make ( [ ] byte , writeBufLen )
for {
// read the type and the length of the entry
_ , err := io . ReadFull ( f , buf [ 0 : 5 ] )
if err == io . EOF {
return nil
} else if err != nil {
l . logger . Printf ( "error reading segment file %s: %s" , fileName , err . Error ( ) )
return err
}
entryType := buf [ 0 ]
length := btou32 ( buf [ 1 : 5 ] )
// read the compressed block and decompress it
if int ( length ) > len ( buf ) {
buf = make ( [ ] byte , length )
}
_ , err = io . ReadFull ( f , buf [ 0 : length ] )
2015-09-30 15:48:20 +00:00
if err == io . EOF || err == io . ErrUnexpectedEOF {
2015-09-17 15:23:27 +00:00
l . logger . Printf ( "hit end of file while reading compressed wal entry from %s" , fileName )
return nil
} else if err != nil {
return err
}
data , err = snappy . Decode ( data , buf [ 0 : length ] )
if err != nil {
l . logger . Printf ( "error decoding compressed entry from %s: %s" , fileName , err . Error ( ) )
return nil
}
// and marshal it and send it to the cache
switch walEntryType ( entryType ) {
case pointsEntry :
2015-09-25 14:49:26 +00:00
points , err := models . ParsePoints ( data )
2015-09-17 15:23:27 +00:00
if err != nil {
2015-10-27 18:10:39 +00:00
l . logger . Printf ( "failed to parse points: %v" , err )
2015-09-17 15:23:27 +00:00
return err
}
2015-09-17 19:46:53 +00:00
l . addToCache ( points , nil , nil , false )
2015-09-17 15:23:27 +00:00
case fieldsEntry :
fields := make ( map [ string ] * tsdb . MeasurementFields )
if err := json . Unmarshal ( data , & fields ) ; err != nil {
return err
}
2015-09-17 19:46:53 +00:00
l . addToCache ( nil , fields , nil , false )
2015-09-17 15:23:27 +00:00
case seriesEntry :
2015-10-15 20:14:01 +00:00
var series [ ] * tsdb . SeriesCreate
2015-09-17 15:23:27 +00:00
if err := json . Unmarshal ( data , & series ) ; err != nil {
return err
}
2015-09-17 19:46:53 +00:00
l . addToCache ( nil , nil , series , false )
2015-10-01 19:16:28 +00:00
case deleteEntry :
2015-10-05 21:21:07 +00:00
d := & deleteData { }
if err := json . Unmarshal ( data , & d ) ; err != nil {
2015-10-01 19:16:28 +00:00
return err
}
2015-10-13 18:35:48 +00:00
l . IndexWriter . MarkDeletes ( d . Keys )
l . IndexWriter . MarkMeasurementDelete ( d . MeasurementName )
2015-10-05 21:21:07 +00:00
l . deleteKeysFromCache ( d . Keys )
if d . MeasurementName != "" {
l . deleteMeasurementFromCache ( d . MeasurementName )
}
2015-09-17 15:23:27 +00:00
}
}
}
2015-09-09 18:29:50 +00:00
func ( l * Log ) writeToLog ( writeType walEntryType , data [ ] byte ) error {
l . writeLock . Lock ( )
defer l . writeLock . Unlock ( )
2015-10-22 16:12:07 +00:00
// Make sure the log has not been closed
2015-10-21 22:33:51 +00:00
select {
case <- l . closing :
return ErrWALClosed
default :
}
2015-09-29 01:04:02 +00:00
if l . currentSegmentFile == nil || l . currentSegmentSize > DefaultSegmentSize {
2015-09-17 15:23:27 +00:00
if err := l . newSegmentFile ( ) ; err != nil {
2015-10-26 15:35:05 +00:00
// A drop database or RP call could trigger this error if writes were in-flight
// when the drop statement executes.
return fmt . Errorf ( "error opening new segment file for wal: %s" , err . Error ( ) )
2015-09-17 15:23:27 +00:00
}
2015-09-09 18:29:50 +00:00
}
2015-09-17 15:23:27 +00:00
// The panics here are an intentional choice. Based on reports from users
// it's better to fail hard if the database can't take writes. Then they'll
// get alerted and fix whatever is broken. Remove these and face Paul's wrath.
2015-09-09 18:29:50 +00:00
if _ , err := l . currentSegmentFile . Write ( [ ] byte { byte ( writeType ) } ) ; err != nil {
panic ( fmt . Sprintf ( "error writing type to wal: %s" , err . Error ( ) ) )
}
if _ , err := l . currentSegmentFile . Write ( u32tob ( uint32 ( len ( data ) ) ) ) ; err != nil {
panic ( fmt . Sprintf ( "error writing len to wal: %s" , err . Error ( ) ) )
}
if _ , err := l . currentSegmentFile . Write ( data ) ; err != nil {
panic ( fmt . Sprintf ( "error writing data to wal: %s" , err . Error ( ) ) )
}
2015-09-29 01:04:02 +00:00
l . currentSegmentSize += 5 + len ( data )
2015-09-09 18:29:50 +00:00
return l . currentSegmentFile . Sync ( )
}
// Flush will force a flush of the WAL to the index
func ( l * Log ) Flush ( ) error {
return l . flush ( idleFlush )
}
2015-10-05 21:21:07 +00:00
func ( l * Log ) DeleteMeasurement ( measurement string , keys [ ] string ) error {
d := & deleteData { MeasurementName : measurement , Keys : keys }
err := l . writeDeleteEntry ( d )
if err != nil {
return err
}
l . deleteKeysFromCache ( keys )
l . deleteMeasurementFromCache ( measurement )
return nil
}
func ( l * Log ) deleteMeasurementFromCache ( name string ) {
2015-10-01 19:16:28 +00:00
l . cacheLock . Lock ( )
defer l . cacheLock . Unlock ( )
2015-10-05 21:21:07 +00:00
delete ( l . measurementFieldsCache , name )
}
func ( l * Log ) writeDeleteEntry ( d * deleteData ) error {
js , err := json . Marshal ( d )
if err != nil {
return err
}
data := snappy . Encode ( nil , js )
return l . writeToLog ( deleteEntry , data )
2015-10-01 19:16:28 +00:00
}
2015-09-09 18:29:50 +00:00
func ( l * Log ) DeleteSeries ( keys [ ] string ) error {
2015-10-05 21:21:07 +00:00
l . deleteKeysFromCache ( keys )
return l . writeDeleteEntry ( & deleteData { Keys : keys } )
}
func ( l * Log ) deleteKeysFromCache ( keys [ ] string ) {
seriesKeys := make ( map [ string ] bool )
for _ , k := range keys {
series , _ := seriesAndFieldFromCompositeKey ( k )
seriesKeys [ series ] = true
}
2015-10-01 19:16:28 +00:00
l . cacheLock . Lock ( )
2015-10-05 21:21:07 +00:00
defer l . cacheLock . Unlock ( )
2015-10-01 19:16:28 +00:00
for _ , k := range keys {
delete ( l . cache , k )
}
2015-10-05 21:21:07 +00:00
// now remove any of these that are marked for creation
var seriesCreate [ ] * tsdb . SeriesCreate
for _ , sc := range l . seriesToCreateCache {
if _ , ok := seriesKeys [ sc . Series . Key ] ; ! ok {
seriesCreate = append ( seriesCreate , sc )
}
2015-10-01 19:16:28 +00:00
}
2015-10-05 21:21:07 +00:00
l . seriesToCreateCache = seriesCreate
2015-09-09 18:29:50 +00:00
}
// Close will finish any flush that is currently in process and close file handles
func ( l * Log ) Close ( ) error {
l . cacheLock . Lock ( )
2015-10-22 05:46:59 +00:00
l . writeLock . Lock ( )
2015-09-09 18:29:50 +00:00
defer l . cacheLock . Unlock ( )
2015-10-22 05:46:59 +00:00
defer l . writeLock . Unlock ( )
2015-09-09 18:29:50 +00:00
2015-10-21 22:15:26 +00:00
// If cache is nil, then we're not open. This avoids a double-close in tests.
if l . cache != nil {
// Close, but don't set to nil so future goroutines can still be signaled
close ( l . closing )
}
2015-09-09 18:29:50 +00:00
l . cache = nil
l . measurementFieldsCache = nil
l . seriesToCreateCache = nil
2015-10-13 18:35:48 +00:00
if l . currentSegmentFile != nil {
l . currentSegmentFile . Close ( )
l . currentSegmentFile = nil
2015-09-09 18:29:50 +00:00
}
return nil
}
// flush writes all wal data in memory to the index
func ( l * Log ) flush ( flush flushType ) error {
2015-10-22 16:12:07 +00:00
// Make sure the log has not been closed
2015-10-21 22:33:51 +00:00
select {
case <- l . closing :
return ErrWALClosed
default :
}
2015-10-22 16:12:07 +00:00
// only flush if there isn't one already running. Memory flushes are only triggered
// by writes, which will mark the flush as running, so we can ignore it.
l . cacheLock . Lock ( )
2015-09-17 19:46:53 +00:00
if l . flushRunning && flush != memoryFlush {
l . cacheLock . Unlock ( )
2015-09-09 18:29:50 +00:00
return nil
}
2015-09-17 19:46:53 +00:00
// mark the flush as running and ensure that it gets marked as not running when we return
2015-09-09 18:29:50 +00:00
l . flushRunning = true
defer func ( ) {
2015-09-17 19:46:53 +00:00
l . cacheLock . Lock ( )
2015-09-09 18:29:50 +00:00
l . flushRunning = false
2015-09-17 19:46:53 +00:00
l . cacheLock . Unlock ( )
2015-09-09 18:29:50 +00:00
} ( )
2015-09-17 19:46:53 +00:00
// only hold the lock while we rotate the segment file
l . writeLock . Lock ( )
2015-09-09 18:29:50 +00:00
lastFileID := l . currentSegmentID
2015-09-18 19:18:05 +00:00
// if it's an idle flush, don't open a new segment file
if flush == idleFlush {
if l . currentSegmentFile != nil {
if err := l . currentSegmentFile . Close ( ) ; err != nil {
2015-10-26 15:37:19 +00:00
l . cacheLock . Unlock ( )
l . writeLock . Unlock ( )
2015-10-26 19:08:06 +00:00
return fmt . Errorf ( "error closing current segment: %v" , err )
2015-09-18 19:18:05 +00:00
}
l . currentSegmentFile = nil
l . currentSegmentSize = 0
}
} else {
if err := l . newSegmentFile ( ) ; err != nil {
2015-10-26 15:37:19 +00:00
l . cacheLock . Unlock ( )
l . writeLock . Unlock ( )
2015-10-26 19:08:06 +00:00
return fmt . Errorf ( "error creating new wal file: %v" , err )
2015-09-18 19:18:05 +00:00
}
2015-09-09 18:29:50 +00:00
}
l . writeLock . Unlock ( )
// copy the cache items to new maps so we can empty them out
2015-09-29 01:04:02 +00:00
l . flushCache = make ( map [ string ] Values )
2015-09-09 18:29:50 +00:00
valueCount := 0
2015-09-29 01:04:02 +00:00
for key , v := range l . cache {
l . flushCache [ key ] = v
2015-09-09 18:29:50 +00:00
valueCount += len ( v )
}
2015-09-29 01:04:02 +00:00
l . cache = make ( map [ string ] Values )
2015-10-15 20:14:01 +00:00
for k := range l . cacheDirtySort {
2015-12-03 13:11:50 +00:00
l . flushCache [ k ] = l . flushCache [ k ] . Deduplicate ( )
2015-09-29 02:50:00 +00:00
}
l . cacheDirtySort = make ( map [ string ] bool )
2015-09-09 18:29:50 +00:00
2015-09-17 15:23:27 +00:00
flushSize := l . memorySize
2015-09-09 18:29:50 +00:00
// reset the memory being used by the cache
l . memorySize = 0
// reset the measurements for flushing
mfc := l . measurementFieldsCache
l . measurementFieldsCache = make ( map [ string ] * tsdb . MeasurementFields )
// reset the series for flushing
scc := l . seriesToCreateCache
l . seriesToCreateCache = nil
l . cacheLock . Unlock ( )
2015-09-17 15:23:27 +00:00
// exit if there's nothing to flush to the index
2015-10-01 19:16:28 +00:00
if len ( l . flushCache ) == 0 && len ( mfc ) == 0 && len ( scc ) == 0 && flush != startupFlush {
2015-09-17 15:23:27 +00:00
return nil
}
if l . LoggingEnabled {
ftype := "idle"
if flush == memoryFlush {
ftype = "memory"
} else if flush == startupFlush {
ftype = "startup"
}
2015-09-30 20:38:55 +00:00
l . logger . Printf ( "%s flush of %s with %d keys and %d total values of %d bytes\n" , ftype , l . path , len ( l . flushCache ) , valueCount , flushSize )
2015-09-17 15:23:27 +00:00
}
2015-09-09 18:29:50 +00:00
startTime := time . Now ( )
2015-10-13 18:35:48 +00:00
if err := l . IndexWriter . Write ( l . flushCache , mfc , scc ) ; err != nil {
2015-10-21 15:12:34 +00:00
l . logger . Printf ( "failed to flush to index: %v" , err )
2015-09-09 18:29:50 +00:00
return err
}
if l . LoggingEnabled {
2015-09-30 20:38:55 +00:00
l . logger . Printf ( "%s flush to index took %s\n" , l . path , time . Since ( startTime ) )
2015-09-09 18:29:50 +00:00
}
l . cacheLock . Lock ( )
l . flushCache = nil
l . cacheLock . Unlock ( )
// remove all the old segment files
fileNames , err := l . segmentFileNames ( )
if err != nil {
return err
}
for _ , fn := range fileNames {
id , err := idFromFileName ( fn )
if err != nil {
return err
}
if id <= lastFileID {
2015-10-26 19:16:32 +00:00
err := os . RemoveAll ( fn )
2015-09-09 18:29:50 +00:00
if err != nil {
2015-10-26 19:16:32 +00:00
return fmt . Errorf ( "failed to remove: %v: %v" , fn , err )
2015-09-09 18:29:50 +00:00
}
}
}
return nil
}
// segmentFileNames will return all files that are WAL segment files in sorted order by ascending ID
func ( l * Log ) segmentFileNames ( ) ( [ ] string , error ) {
names , err := filepath . Glob ( filepath . Join ( l . path , fmt . Sprintf ( "%s*.%s" , WALFilePrefix , WALFileExtension ) ) )
if err != nil {
return nil , err
}
sort . Strings ( names )
return names , nil
}
// newSegmentFile will close the current segment file and open a new one, updating bookkeeping info on the log
func ( l * Log ) newSegmentFile ( ) error {
2015-10-15 20:14:01 +00:00
l . currentSegmentID ++
2015-09-09 18:29:50 +00:00
if l . currentSegmentFile != nil {
if err := l . currentSegmentFile . Close ( ) ; err != nil {
return err
}
}
fileName := filepath . Join ( l . path , fmt . Sprintf ( "%s%05d.%s" , WALFilePrefix , l . currentSegmentID , WALFileExtension ) )
ff , err := os . OpenFile ( fileName , os . O_CREATE | os . O_RDWR , 0666 )
if err != nil {
return err
}
l . currentSegmentSize = 0
l . currentSegmentFile = ff
return nil
}
2015-09-17 19:46:53 +00:00
// shouldFlush will return the flushType specifying whether we should flush. memoryFlush
// is never returned from this function since those can only be triggered by writes
2015-09-09 18:29:50 +00:00
func ( l * Log ) shouldFlush ( ) flushType {
l . cacheLock . RLock ( )
defer l . cacheLock . RUnlock ( )
2015-09-17 19:46:53 +00:00
if l . flushRunning {
2015-09-09 18:29:50 +00:00
return noFlush
}
2015-09-17 19:46:53 +00:00
if len ( l . cache ) == 0 {
return noFlush
2015-09-09 18:29:50 +00:00
}
if time . Since ( l . lastWriteTime ) > l . FlushColdInterval {
return idleFlush
}
return noFlush
}
// cursor is a unidirectional iterator for a given entry in the cache
type walCursor struct {
cache Values
position int
2015-09-25 14:49:26 +00:00
ascending bool
2015-09-09 18:29:50 +00:00
}
2015-09-25 14:49:26 +00:00
func newWALCursor ( cache Values , ascending bool ) * walCursor {
2015-09-09 18:29:50 +00:00
// position is set such that a call to Next will successfully advance
// to the next postion and return the value.
2015-09-25 14:49:26 +00:00
c := & walCursor { cache : cache , ascending : ascending , position : - 1 }
if ! ascending {
2015-09-09 18:29:50 +00:00
c . position = len ( c . cache )
}
return c
}
2015-09-25 14:49:26 +00:00
func ( c * walCursor ) Ascending ( ) bool { return c . ascending }
2015-09-09 18:29:50 +00:00
// Seek will point the cursor to the given time (or key)
2015-09-25 14:49:26 +00:00
func ( c * walCursor ) SeekTo ( seek int64 ) ( int64 , interface { } ) {
2015-09-09 18:29:50 +00:00
// Seek cache index
c . position = sort . Search ( len ( c . cache ) , func ( i int ) bool {
2015-09-25 14:49:26 +00:00
return c . cache [ i ] . Time ( ) . UnixNano ( ) >= seek
2015-09-09 18:29:50 +00:00
} )
// If seek is not in the cache, return the last value in the cache
2015-09-25 14:49:26 +00:00
if ! c . ascending && c . position >= len ( c . cache ) {
2015-10-01 00:41:50 +00:00
c . position = len ( c . cache ) - 1
2015-09-09 18:29:50 +00:00
}
// Make sure our position points to something in the cache
if c . position < 0 || c . position >= len ( c . cache ) {
2015-09-25 14:49:26 +00:00
return tsdb . EOF , nil
2015-09-09 18:29:50 +00:00
}
v := c . cache [ c . position ]
2015-09-25 14:49:26 +00:00
return v . Time ( ) . UnixNano ( ) , v . Value ( )
2015-09-09 18:29:50 +00:00
}
// Next moves the cursor to the next key/value. will return nil if at the end
2015-09-25 14:49:26 +00:00
func ( c * walCursor ) Next ( ) ( int64 , interface { } ) {
2015-09-09 18:29:50 +00:00
var v Value
2015-09-25 14:49:26 +00:00
if c . ascending {
2015-09-09 18:29:50 +00:00
v = c . nextForward ( )
} else {
v = c . nextReverse ( )
}
2015-09-25 14:49:26 +00:00
return v . Time ( ) . UnixNano ( ) , v . Value ( )
2015-09-09 18:29:50 +00:00
}
// nextForward advances the cursor forward returning the next value
func ( c * walCursor ) nextForward ( ) Value {
c . position ++
if c . position >= len ( c . cache ) {
return & EmptyValue { }
}
return c . cache [ c . position ]
}
// nextReverse advances the cursor backwards returning the next value
func ( c * walCursor ) nextReverse ( ) Value {
c . position --
if c . position < 0 {
return & EmptyValue { }
}
return c . cache [ c . position ]
}
2015-10-05 21:21:07 +00:00
// deleteData holds the information for a delete entry
type deleteData struct {
// MeasurementName will be empty for deletes that are only against series
MeasurementName string
Keys [ ] string
}