feat(tsdb): Add SQL export for TSI indexes

pull/15127/head
Ben Johnson 2019-09-12 09:46:53 -06:00
parent a90786d1cf
commit d08403b658
No known key found for this signature in database
GPG Key ID: 81741CD251883081
5 changed files with 363 additions and 0 deletions

View File

@ -0,0 +1,58 @@
package inspect
import (
"context"
"os"
"path/filepath"
"github.com/influxdata/influxdb/internal/fs"
"github.com/influxdata/influxdb/tsdb"
"github.com/influxdata/influxdb/tsdb/tsi1"
"github.com/spf13/cobra"
)
func NewExportIndexCommand() *cobra.Command {
cmd := &cobra.Command{
Use: `export-index`,
Short: "Exports TSI index data",
Long: `
This command will export all series in a TSI index to
another format for easier inspection and debugging.`,
}
defaultDataDir, _ := fs.InfluxDir()
defaultDataDir = filepath.Join(defaultDataDir, "engine")
defaultIndexDir := filepath.Join(defaultDataDir, "index")
defaultSeriesDir := filepath.Join(defaultDataDir, "_series")
var seriesFilePath, dataPath string
cmd.Flags().StringVar(&seriesFilePath, "series-path", defaultSeriesDir, "Path to series file")
cmd.Flags().StringVar(&dataPath, "index-path", defaultIndexDir, "Path to the index directory of the data engine")
cmd.RunE = func(cmd *cobra.Command, args []string) error {
// Initialize series file.
sfile := tsdb.NewSeriesFile(seriesFilePath)
if err := sfile.Open(context.Background()); err != nil {
return err
}
defer sfile.Close()
// Open index.
idx := tsi1.NewIndex(sfile, tsi1.NewConfig(), tsi1.WithPath(dataPath), tsi1.DisableCompactions())
if err := idx.Open(context.Background()); err != nil {
return err
}
defer idx.Close()
// Dump out index data.
e := tsi1.NewSQLIndexExporter(os.Stdout)
if err := e.ExportIndex(idx); err != nil {
return err
} else if err := e.Close(); err != nil {
return err
}
return nil
}
return cmd
}

View File

@ -15,6 +15,7 @@ func NewCommand() *cobra.Command {
// If a new sub-command is created, it must be added here
subCommands := []*cobra.Command{
NewExportBlocksCommand(),
NewExportIndexCommand(),
NewReportTSMCommand(),
NewVerifyTSMCommand(),
NewVerifyWALCommand(),

View File

@ -27,6 +27,12 @@ func EncodeName(org, bucket platform.ID) [16]byte {
return nameBytes
}
// EncodeNameSlice converts org/bucket pairs to the tsdb internal serialization but returns a byte slice.
func EncodeNameSlice(org, bucket platform.ID) []byte {
buf := EncodeName(org, bucket)
return buf[:]
}
// EncodeOrgName converts org to the tsdb internal serialization that may be used
// as a prefix when searching for keys matching a specific organization.
func EncodeOrgName(org platform.ID) [8]byte {

View File

@ -0,0 +1,246 @@
package tsi1
import (
"bytes"
"fmt"
"io"
"strings"
"unicode/utf8"
"github.com/influxdata/influxdb/tsdb"
"go.uber.org/zap"
)
// SQLIndexExporter writes out all TSI data for an index to a SQL export.
type SQLIndexExporter struct {
w io.Writer
initialized bool
// Logs non-fatal warnings.
Logger *zap.Logger
// Write schema, if true.
ShowSchema bool
}
// NewSQLIndexExporter returns a new instance of SQLIndexExporter.
func NewSQLIndexExporter(w io.Writer) *SQLIndexExporter {
return &SQLIndexExporter{
w: w,
Logger: zap.NewNop(),
ShowSchema: true,
}
}
// Close ends the export and writes final output.
func (e *SQLIndexExporter) Close() error {
return nil
}
// ExportIndex writes all blocks of the TSM file.
func (e *SQLIndexExporter) ExportIndex(idx *Index) error {
if err := e.initialize(); err != nil {
return err
}
fmt.Fprintln(e.w, `BEGIN TRANSACTION;`)
// Iterate over each measurement across all partitions.
itr, err := idx.MeasurementIterator()
if err != nil {
return err
} else if itr == nil {
return nil
}
defer itr.Close()
for {
name, err := itr.Next()
if err != nil {
return err
} else if name == nil {
break
}
if err := e.exportMeasurement(idx, name); err != nil {
return err
}
}
fmt.Fprintln(e.w, "COMMIT;")
return nil
}
func (e *SQLIndexExporter) exportMeasurement(idx *Index, name []byte) error {
// Log measurements that can't be parsed into org/bucket.
if len(name) != 16 {
e.Logger.With(zap.Binary("name", name)).Warn("cannot parse non-standard measurement, skipping")
return nil
}
if err := e.exportMeasurementSeries(idx, name); err != nil {
return err
}
itr, err := idx.TagKeyIterator(name)
if err != nil {
return err
} else if itr == nil {
return nil
}
defer itr.Close()
for {
key, err := itr.Next()
if err != nil {
return err
} else if key == nil {
break
}
if err := e.exportTagKey(idx, name, key); err != nil {
return err
}
}
return nil
}
func (e *SQLIndexExporter) exportMeasurementSeries(idx *Index, name []byte) error {
orgID, bucketID := tsdb.DecodeNameSlice(name[:16])
itr, err := idx.MeasurementSeriesIDIterator(name)
if err != nil {
return err
} else if itr == nil {
return nil
}
defer itr.Close()
for {
elem, err := itr.Next()
if err != nil {
return err
} else if elem.SeriesID.ID == 0 {
break
}
if _, err := fmt.Fprintf(e.w,
"INSERT INTO measurement_series (org_id, bucket_id, series_id) VALUES (%d, %d, %d);\n",
orgID,
bucketID,
elem.SeriesID.ID,
); err != nil {
return err
}
}
return nil
}
func (e *SQLIndexExporter) exportTagKey(idx *Index, name, key []byte) error {
itr, err := idx.TagValueIterator(name, key)
if err != nil {
return err
} else if itr == nil {
return nil
}
defer itr.Close()
for {
value, err := itr.Next()
if err != nil {
return err
} else if value == nil {
break
}
if err := e.exportTagValue(idx, name, key, value); err != nil {
return err
}
}
return nil
}
func (e *SQLIndexExporter) exportTagValue(idx *Index, name, key, value []byte) error {
orgID, bucketID := tsdb.DecodeNameSlice(name[:16])
itr, err := idx.TagValueSeriesIDIterator(name, key, value)
if err != nil {
return err
} else if itr == nil {
return nil
}
defer itr.Close()
for {
elem, err := itr.Next()
if err != nil {
return err
} else if elem.SeriesID.ID == 0 {
break
}
// Replace special case keys for measurement & field.
if bytes.Equal(key, []byte{0}) {
key = []byte("_m")
} else if bytes.Equal(key, []byte{0xff}) {
key = []byte("_f")
}
if _, err := fmt.Fprintf(e.w,
"INSERT INTO tag_value_series (org_id, bucket_id, key, value, series_id) VALUES (%d, %d, %s, %s, %d);\n",
orgID,
bucketID,
quoteSQL(string(key)),
quoteSQL(string(value)),
elem.SeriesID.ID,
); err != nil {
return err
}
}
return nil
}
func (e *SQLIndexExporter) initialize() error {
if e.initialized {
return nil
}
e.initialized = true
if !e.ShowSchema {
return nil
}
fmt.Fprintln(e.w, `
CREATE TABLE IF NOT EXISTS measurement_series (
org_id INTEGER NOT NULL,
bucket_id INTEGER NOT NULL,
series_id INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS tag_value_series (
org_id INTEGER NOT NULL,
bucket_id INTEGER NOT NULL,
key TEXT NOT NULL,
value TEXT NOT NULL,
series_id INTEGER NOT NULL
);
`[1:])
return nil
}
func quoteSQL(s string) string {
return `'` + sqlReplacer.Replace(toValidUTF8(s)) + `'`
}
var sqlReplacer = strings.NewReplacer(`'`, `''`, "\x00", "")
func toValidUTF8(s string) string {
return strings.Map(func(r rune) rune {
if r == utf8.RuneError {
return -1
}
return r
}, s)
}

View File

@ -0,0 +1,52 @@
package tsi1_test
import (
"bytes"
"os"
"testing"
"github.com/influxdata/influxdb/logger"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb"
"github.com/influxdata/influxdb/tsdb/tsi1"
)
func TestSQLIndexExporter_ExportIndex(t *testing.T) {
idx := MustOpenIndex(1, tsi1.NewConfig())
defer idx.Close()
// Add series to index.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: tsdb.EncodeNameSlice(1, 2), Tags: models.NewTags(map[string]string{"region": "east", "status": "ok"})},
{Name: tsdb.EncodeNameSlice(1, 2), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: tsdb.EncodeNameSlice(3, 4), Tags: models.NewTags(map[string]string{"region": "east"})},
}); err != nil {
t.Fatal(err)
}
// Expected output.
want := `
BEGIN TRANSACTION;
INSERT INTO measurement_series (org_id, bucket_id, series_id) VALUES (1, 2, 1);
INSERT INTO measurement_series (org_id, bucket_id, series_id) VALUES (1, 2, 5);
INSERT INTO tag_value_series (org_id, bucket_id, key, value, series_id) VALUES (1, 2, 'region', 'east', 1);
INSERT INTO tag_value_series (org_id, bucket_id, key, value, series_id) VALUES (1, 2, 'region', 'west', 5);
INSERT INTO tag_value_series (org_id, bucket_id, key, value, series_id) VALUES (1, 2, 'status', 'ok', 1);
INSERT INTO measurement_series (org_id, bucket_id, series_id) VALUES (3, 4, 2);
INSERT INTO tag_value_series (org_id, bucket_id, key, value, series_id) VALUES (3, 4, 'region', 'east', 2);
COMMIT;
`[1:]
// Export file to SQL.
var buf bytes.Buffer
e := tsi1.NewSQLIndexExporter(&buf)
e.ShowSchema = false
e.Logger = logger.New(os.Stderr)
if err := e.ExportIndex(idx.Index); err != nil {
t.Fatal(err)
} else if err := e.Close(); err != nil {
t.Fatal(err)
} else if got := buf.String(); got != want {
t.Fatalf("unexpected output:\ngot=%s\n--\nwant=%s", got, want)
}
}