2015-10-01 19:23:38 +00:00
|
|
|
package tsm1
|
2015-09-29 22:08:18 +00:00
|
|
|
|
|
|
|
// String encoding uses snappy compression to compress each string. Each string is
|
|
|
|
// appended to byte slice prefixed with a variable byte length followed by the string
|
|
|
|
// bytes. The bytes are compressed using snappy compressor and a 1 byte header is used
|
|
|
|
// to indicate the type of encoding.
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/binary"
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
"github.com/golang/snappy"
|
|
|
|
)
|
|
|
|
|
2015-10-02 16:03:20 +00:00
|
|
|
const (
|
2015-10-15 20:14:01 +00:00
|
|
|
// stringUncompressed is a an uncompressed format encoding strings as raw bytes.
|
|
|
|
// Not yet implemented.
|
2015-10-02 16:03:20 +00:00
|
|
|
stringUncompressed = 0
|
2015-10-15 20:14:01 +00:00
|
|
|
|
2015-10-02 16:03:20 +00:00
|
|
|
// stringCompressedSnappy is a compressed encoding using Snappy compression
|
|
|
|
stringCompressedSnappy = 1
|
|
|
|
)
|
|
|
|
|
2015-09-29 22:08:18 +00:00
|
|
|
type StringEncoder interface {
|
|
|
|
Write(s string)
|
|
|
|
Bytes() ([]byte, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
type StringDecoder interface {
|
|
|
|
Next() bool
|
|
|
|
Read() string
|
2015-10-02 16:46:58 +00:00
|
|
|
Error() error
|
2015-09-29 22:08:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type stringEncoder struct {
|
|
|
|
// The encoded bytes
|
|
|
|
bytes []byte
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewStringEncoder() StringEncoder {
|
|
|
|
return &stringEncoder{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *stringEncoder) Write(s string) {
|
|
|
|
b := make([]byte, 10)
|
|
|
|
// Append the length of the string using variable byte encoding
|
|
|
|
i := binary.PutUvarint(b, uint64(len(s)))
|
|
|
|
e.bytes = append(e.bytes, b[:i]...)
|
|
|
|
|
|
|
|
// Append the string bytes
|
|
|
|
e.bytes = append(e.bytes, s...)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *stringEncoder) Bytes() ([]byte, error) {
|
|
|
|
// Compress the currently appended bytes using snappy and prefix with
|
|
|
|
// a 1 byte header for future extension
|
|
|
|
data := snappy.Encode(nil, e.bytes)
|
2015-10-02 16:03:20 +00:00
|
|
|
return append([]byte{stringCompressedSnappy << 4}, data...), nil
|
2015-09-29 22:08:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type stringDecoder struct {
|
2015-10-02 16:46:58 +00:00
|
|
|
b []byte
|
|
|
|
l int
|
|
|
|
i int
|
|
|
|
err error
|
2015-09-29 22:08:18 +00:00
|
|
|
}
|
|
|
|
|
2015-10-02 16:46:58 +00:00
|
|
|
func NewStringDecoder(b []byte) (StringDecoder, error) {
|
2015-09-29 22:08:18 +00:00
|
|
|
// First byte stores the encoding type, only have snappy format
|
|
|
|
// currently so ignore for now.
|
|
|
|
data, err := snappy.Decode(nil, b[1:])
|
|
|
|
if err != nil {
|
2015-10-02 16:46:58 +00:00
|
|
|
return nil, fmt.Errorf("failed to decode string block: %v", err.Error())
|
2015-09-29 22:08:18 +00:00
|
|
|
}
|
|
|
|
|
2015-10-02 16:46:58 +00:00
|
|
|
return &stringDecoder{b: data}, nil
|
2015-09-29 22:08:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (e *stringDecoder) Next() bool {
|
|
|
|
e.i += e.l
|
|
|
|
return e.i < len(e.b)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *stringDecoder) Read() string {
|
|
|
|
// Read the length of the string
|
|
|
|
length, n := binary.Uvarint(e.b[e.i:])
|
|
|
|
|
|
|
|
// The length of this string plus the length of the variable byte encoded length
|
|
|
|
e.l = int(length) + n
|
|
|
|
|
|
|
|
return string(e.b[e.i+n : e.i+n+int(length)])
|
|
|
|
}
|
2015-10-02 16:46:58 +00:00
|
|
|
|
|
|
|
func (e *stringDecoder) Error() error {
|
|
|
|
return e.err
|
|
|
|
}
|