package tsm1

/*
Tombstone file format:

╔═══════════════════════════════════════════Tombstone File════════════════════════════════════════════╗
║ ┌─────────────┐┌──────────────────────────────────────────────────────────────────────────────────┐ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │   Header    ││                                                                                  │ ║
║ │   4 bytes   ││                                Tombstone Entries                                 │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ │             ││                                                                                  │ ║
║ └─────────────┘└──────────────────────────────────────────────────────────────────────────────────┘ ║
╚═════════════════════════════════════════════════════════════════════════════════════════════════════╝

╔═══════════════════════════════════════════Tombstone Entry═══════════════════════════════════════════╗
║ ┌──────┐┌───────────────┐┌────────────┐┌────────────────────────┐┌───────────────┐┌───────────────┐ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │Prefix││   Reserved    ││ Key Length ││          Key           ││   Min Time    ││   Max Time    │ ║
║ │ Bit  ││    7 bits     ││  24 bits   ││        N bytes         ││    8 bytes    ││    8 bytes    │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ │      ││               ││            ││                        ││               ││               │ ║
║ └──────┘└───────────────┘└────────────┘└────────────────────────┘└───────────────┘└───────────────┘ ║
╚═════════════════════════════════════════════════════════════════════════════════════════════════════╝

NOTE: v1, v2 and v3 tombstone supports have been dropped from 2.x. Only v4 is now
supported.
*/

import (
	"bufio"
	"compress/gzip"
	"encoding/binary"
	"errors"
	"fmt"
	"io"
	"math"
	"os"
	"path/filepath"
	"strings"
	"sync"

	"github.com/influxdata/influxdb/v2/pkg/fs"
)

const (
	headerSize = 4
	v4header   = 0x1504
)

var errIncompatibleV4Version = errors.New("incompatible v4 version")

// Tombstoner records tombstones when entries are deleted.
type Tombstoner struct {
	mu sync.RWMutex

	// Path is the location of the file to record tombstone. This should be the
	// full path to a TSM file.
	Path string

	FilterFn func(k []byte) bool

	// cache of the stats for this tombstone
	fileStats []FileStat
	// indicates that the stats may be out of sync with what is on disk and they
	// should be refreshed.
	statsLoaded bool

	// Tombstones that have been written but not flushed to disk yet.
	tombstones []Tombstone

	// These are references used for pending writes that have not been committed.  If
	// these are nil, then no pending writes are in progress.
	gz                *gzip.Writer
	bw                *bufio.Writer
	pendingFile       *os.File
	tmp               [8]byte
	lastAppliedOffset int64

	// Optional observer for when tombstone files are written.
	obs FileStoreObserver
}

// NewTombstoner constructs a Tombstoner for the given path. FilterFn can be nil.
func NewTombstoner(path string, filterFn func(k []byte) bool) *Tombstoner {
	return &Tombstoner{
		Path:     path,
		FilterFn: filterFn,
		obs:      noFileStoreObserver{},
	}
}

// Tombstone represents an individual deletion.
type Tombstone struct {
	// Key is the tombstoned series key.
	Key []byte

	// Prefix indicates if this tombstone entry is a prefix key, meaning all
	// keys with a prefix matching Key should be removed for the [Min, Max] range.
	Prefix bool

	// Min and Max are the min and max unix nanosecond time ranges of Key that are deleted.
	Min, Max int64

	// Predicate stores the marshaled form of some predicate for matching keys.
	Predicate []byte
}

func (t Tombstone) String() string {
	prefix := "Key"
	if t.Prefix {
		prefix = "Prefix"
	}
	return fmt.Sprintf("%s: %q, [%d, %d] pred:%v", prefix, t.Key, t.Min, t.Max, len(t.Predicate) > 0)
}

// WithObserver sets a FileStoreObserver for when the tombstone file is written.
func (t *Tombstoner) WithObserver(obs FileStoreObserver) {
	if obs == nil {
		obs = noFileStoreObserver{}
	}
	t.obs = obs
}

// AddPrefixRange adds a prefix-based tombstone key with an explicit range.
func (t *Tombstoner) AddPrefixRange(key []byte, min, max int64, predicate []byte) error {
	t.mu.Lock()
	defer t.mu.Unlock()

	// If this TSMFile has not been written (mainly in tests), don't write a
	// tombstone because the keys will not be written when it's actually saved.
	if t.Path == "" {
		return nil
	}

	t.statsLoaded = false

	if err := t.prepareLatest(); err != nil {
		return err
	}

	return t.writeTombstoneV4(t.gz, Tombstone{
		Key:       key,
		Min:       min,
		Max:       max,
		Prefix:    true,
		Predicate: predicate,
	})
}

// Add adds the all keys, across all timestamps, to the tombstone.
func (t *Tombstoner) Add(keys [][]byte) error {
	return t.AddRange(keys, math.MinInt64, math.MaxInt64)
}

// AddRange adds all keys to the tombstone specifying only the data between min and max to be removed.
func (t *Tombstoner) AddRange(keys [][]byte, min, max int64) error {
	for t.FilterFn != nil && len(keys) > 0 && !t.FilterFn(keys[0]) {
		keys = keys[1:]
	}

	if len(keys) == 0 {
		return nil
	}

	t.mu.Lock()
	defer t.mu.Unlock()

	// If this TSMFile has not been written (mainly in tests), don't write a
	// tombstone because the keys will not be written when it's actually saved.
	if t.Path == "" {
		return nil
	}

	t.statsLoaded = false

	if err := t.prepareLatest(); err != nil {
		return err
	}

	for _, k := range keys {
		if t.FilterFn != nil && !t.FilterFn(k) {
			continue
		}

		if err := t.writeTombstoneV4(t.gz, Tombstone{
			Key:    k,
			Min:    min,
			Max:    max,
			Prefix: false,
		}); err != nil {
			return err
		}
	}
	return nil
}

func (t *Tombstoner) Flush() error {
	t.mu.Lock()
	defer t.mu.Unlock()

	if err := t.commit(); err != nil {
		// Reset our temp references and clean up.
		_ = t.rollback()
		return err
	}
	return nil
}

func (t *Tombstoner) Rollback() error {
	t.mu.Lock()
	defer t.mu.Unlock()
	return t.rollback()
}

// Delete removes all the tombstone files from disk.
func (t *Tombstoner) Delete() error {
	t.mu.Lock()
	defer t.mu.Unlock()
	if err := os.RemoveAll(t.tombstonePath()); err != nil {
		return err
	}
	t.statsLoaded = false
	t.lastAppliedOffset = 0

	return nil
}

// HasTombstones return true if there are any tombstone entries recorded.
func (t *Tombstoner) HasTombstones() bool {
	files := t.TombstoneFiles()
	t.mu.RLock()
	n := len(t.tombstones)
	t.mu.RUnlock()

	return len(files) > 0 && files[0].Size > 0 || n > 0
}

// TombstoneFiles returns any tombstone files associated with Tombstoner's TSM file.
func (t *Tombstoner) TombstoneFiles() []FileStat {
	t.mu.RLock()
	if t.statsLoaded {
		stats := t.fileStats
		t.mu.RUnlock()
		return stats
	}
	t.mu.RUnlock()

	stat, err := os.Stat(t.tombstonePath())
	if os.IsNotExist(err) || err != nil {
		t.mu.Lock()
		// The file doesn't exist so record that we tried to load it so
		// we don't continue to keep trying.  This is the common case.
		t.statsLoaded = os.IsNotExist(err)
		t.fileStats = t.fileStats[:0]
		t.mu.Unlock()
		return nil
	}

	t.mu.Lock()
	t.fileStats = append(t.fileStats[:0], FileStat{
		Path:         t.tombstonePath(),
		LastModified: stat.ModTime().UnixNano(),
		Size:         uint32(stat.Size()),
	})
	t.statsLoaded = true
	stats := t.fileStats
	t.mu.Unlock()

	return stats
}

// Walk calls fn for every Tombstone under the Tombstoner.
func (t *Tombstoner) Walk(fn func(t Tombstone) error) error {
	t.mu.Lock()
	defer t.mu.Unlock()

	f, err := os.Open(t.tombstonePath())
	if os.IsNotExist(err) {
		return nil
	} else if err != nil {
		return err
	}
	defer f.Close()

	var b [4]byte
	if _, err := f.Read(b[:]); err != nil {
		return errors.New("unable to read header")
	}

	if _, err := f.Seek(0, io.SeekStart); err != nil {
		return err
	}

	header := binary.BigEndian.Uint32(b[:])
	if header == v4header {
		return t.readTombstoneV4(f, fn)
	}
	return errors.New("invalid tombstone file")
}

func (t *Tombstoner) prepareLatest() error {
	if t.pendingFile != nil { // There is already a pending tombstone file open.
		return nil
	}

	tmpPath := fmt.Sprintf("%s.%s", t.tombstonePath(), CompactionTempExtension)
	tmp, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_RDWR|os.O_EXCL, 0666)
	if err != nil {
		return err
	}

	removeTmp := func() {
		tmp.Close()
		os.Remove(tmp.Name())
	}

	// Copy the existing v4 file if it exists
	f, err := os.Open(t.tombstonePath())
	if err != nil && !os.IsNotExist(err) {
		// An unexpected error should be returned
		removeTmp()
		return err
	} else if err == nil {
		// No error so load the tombstone file.
		defer f.Close()
		var b [4]byte
		if n, err := f.Read(b[:]); n == 4 && err == nil {
			header := binary.BigEndian.Uint32(b[:])
			// There is an existing tombstone on disk and it's not a v4.
			// We can't support it.
			if header != v4header {
				removeTmp()
				return errIncompatibleV4Version
			}

			// Seek back to the beginning we copy the header
			if _, err := f.Seek(0, io.SeekStart); err != nil {
				removeTmp()
				return err
			}

			// Copy the whole file
			if _, err := io.Copy(tmp, f); err != nil {
				f.Close()
				removeTmp()
				return err
			}
		}
	}

	// Else, the error was that the file does not exist. Create a new one.
	var b [8]byte
	bw := bufio.NewWriterSize(tmp, 64*1024)

	// Write the header only if the file is new
	if os.IsNotExist(err) {
		binary.BigEndian.PutUint32(b[:4], v4header)
		if _, err := bw.Write(b[:4]); err != nil {
			removeTmp()
			return err
		}
	}

	// Write the tombstones
	gz := gzip.NewWriter(bw)

	t.pendingFile = tmp
	t.gz = gz
	t.bw = bw

	return nil
}

func (t *Tombstoner) commit() error {
	// No pending writes
	if t.pendingFile == nil {
		return nil
	}

	if err := t.gz.Close(); err != nil {
		return err
	}

	if err := t.bw.Flush(); err != nil {
		return err
	}

	// fsync the file to flush the write
	if err := t.pendingFile.Sync(); err != nil {
		return err
	}

	tmpFilename := t.pendingFile.Name()
	t.pendingFile.Close()

	if err := t.obs.FileFinishing(tmpFilename); err != nil {
		return err
	}

	if err := fs.RenameFileWithReplacement(tmpFilename, t.tombstonePath()); err != nil {
		return err
	}

	if err := fs.SyncDir(filepath.Dir(t.tombstonePath())); err != nil {
		return err
	}

	t.pendingFile = nil
	t.bw = nil
	t.gz = nil

	return nil
}

func (t *Tombstoner) rollback() error {
	if t.pendingFile == nil {
		return nil
	}

	tmpFilename := t.pendingFile.Name()
	t.pendingFile.Close()
	t.gz = nil
	t.bw = nil
	t.pendingFile = nil
	return os.Remove(tmpFilename)
}

// readTombstoneV4 reads the fourth version of tombstone files that are capable
// of storing multiple v3 files appended together.
func (t *Tombstoner) readTombstoneV4(f *os.File, fn func(t Tombstone) error) error {
	// Skip header, already checked earlier
	if t.lastAppliedOffset != 0 {
		if _, err := f.Seek(t.lastAppliedOffset, io.SeekStart); err != nil {
			return err
		}
	} else {
		if _, err := f.Seek(headerSize, io.SeekStart); err != nil {
			return err
		}
	}

	const kmask = int64(0xff000000) // Mask for non key-length bits

	br := bufio.NewReaderSize(f, 64*1024)
	gr, err := gzip.NewReader(br)
	if err == io.EOF {
		return nil
	} else if err != nil {
		return err
	}
	defer gr.Close()

	var ( // save these buffers across loop iterations to avoid allocations
		keyBuf  []byte
		predBuf []byte
	)

	for {
		gr.Multistream(false)
		if err := func() error {
			for {
				var buf [8]byte

				if _, err = io.ReadFull(gr, buf[:4]); err == io.EOF || err == io.ErrUnexpectedEOF {
					return nil
				} else if err != nil {
					return err
				}

				keyLen := int64(binary.BigEndian.Uint32(buf[:4]))
				prefix := keyLen>>31&1 == 1 // Prefix is set according to whether the highest bit is set.
				hasPred := keyLen>>30&1 == 1

				// Remove 8 MSB to get correct length.
				keyLen &^= kmask

				if int64(len(keyBuf)) < keyLen {
					keyBuf = make([]byte, keyLen)
				}
				// cap slice protects against invalid usages of append in callback
				key := keyBuf[:keyLen:keyLen]

				if _, err := io.ReadFull(gr, key); err != nil {
					return err
				}

				if _, err := io.ReadFull(gr, buf[:8]); err != nil {
					return err
				}
				min := int64(binary.BigEndian.Uint64(buf[:8]))

				if _, err := io.ReadFull(gr, buf[:8]); err != nil {
					return err
				}
				max := int64(binary.BigEndian.Uint64(buf[:8]))

				var predicate []byte
				if hasPred {
					if _, err := io.ReadFull(gr, buf[:8]); err != nil {
						return err
					}
					predLen := binary.BigEndian.Uint64(buf[:8])

					if uint64(len(predBuf)) < predLen {
						predBuf = make([]byte, predLen)
					}
					// cap slice protects against invalid usages of append in callback
					predicate = predBuf[:predLen:predLen]

					if _, err := io.ReadFull(gr, predicate); err != nil {
						return err
					}
				}

				if err := fn(Tombstone{
					Key:       key,
					Min:       min,
					Max:       max,
					Prefix:    prefix,
					Predicate: predicate,
				}); err != nil {
					return err
				}
			}
		}(); err != nil {
			return err
		}

		for _, t := range t.tombstones {
			if err := fn(t); err != nil {
				return err
			}
		}

		err = gr.Reset(br)
		if err == io.EOF {
			break
		}
	}

	// Save the position of tombstone file so we don't re-apply the same set again if there are
	// more deletes.
	pos, err := f.Seek(0, io.SeekCurrent)
	if err != nil {
		return err
	}
	t.lastAppliedOffset = pos
	return nil
}

func (t *Tombstoner) tombstonePath() string {
	if strings.HasSuffix(t.Path, "tombstone") {
		return t.Path
	}

	// Filename is 0000001.tsm1
	filename := filepath.Base(t.Path)

	// Strip off the tsm1
	ext := filepath.Ext(filename)
	if ext != "" {
		filename = strings.TrimSuffix(filename, ext)
	}

	// Append the "tombstone" suffix to create a 0000001.tombstone file
	return filepath.Join(filepath.Dir(t.Path), filename+".tombstone")
}

func (t *Tombstoner) writeTombstoneV4(dst io.Writer, ts Tombstone) error {
	maxKeyLen := 0x00ffffff // 24 bit key length. Top 8 bits for other information.

	// Maximum key length. Leaves 8 spare bits.
	if len(ts.Key) > maxKeyLen {
		return fmt.Errorf("key has length %d, maximum allowed key length %d", len(ts.Key), maxKeyLen)
	}

	l := uint32(len(ts.Key))
	if ts.Prefix {
		// A mask to set the prefix bit on a tombstone.
		l |= 1 << 31
	}
	if len(ts.Predicate) > 0 {
		// A mask to set the predicate bit on a tombstone
		l |= 1 << 30
	}

	binary.BigEndian.PutUint32(t.tmp[:4], l)
	if _, err := dst.Write(t.tmp[:4]); err != nil {
		return err
	}
	if _, err := dst.Write([]byte(ts.Key)); err != nil {
		return err
	}

	binary.BigEndian.PutUint64(t.tmp[:], uint64(ts.Min))
	if _, err := dst.Write(t.tmp[:]); err != nil {
		return err
	}

	binary.BigEndian.PutUint64(t.tmp[:], uint64(ts.Max))
	if _, err := dst.Write(t.tmp[:]); err != nil {
		return err
	}

	if len(ts.Predicate) > 0 {
		binary.BigEndian.PutUint64(t.tmp[:], uint64(len(ts.Predicate)))
		if _, err := dst.Write(t.tmp[:]); err != nil {
			return err
		}

		if _, err := dst.Write(ts.Predicate); err != nil {
			return err
		}
	}

	return nil
}