330 lines
8.0 KiB
Go
330 lines
8.0 KiB
Go
package influxql
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
)
|
|
|
|
// Scanner represents a lexical scanner for InfluxQL.
|
|
type Scanner struct {
|
|
r *reader
|
|
}
|
|
|
|
// NewScanner returns a new instance of Scanner.
|
|
func NewScanner(r io.Reader) *Scanner {
|
|
return &Scanner{r: &reader{r: bufio.NewReader(r)}}
|
|
}
|
|
|
|
// Scan returns the next token from the underlying reader.
|
|
// Also returns the position and literal string text read for the token.
|
|
func (s *Scanner) Scan() (tok Token, pos Pos, lit string) {
|
|
// Read next code point.
|
|
ch0, pos := s.r.read()
|
|
|
|
// If we see whitespace then consume all contiguous whitespace.
|
|
// If we see a letter then consume as an ident or reserved word.
|
|
if isWhitespace(ch0) {
|
|
return s.scanWhitespace()
|
|
} else if isLetter(ch0) {
|
|
return s.scanIdent()
|
|
} else if isDigit(ch0) {
|
|
return s.scanNumber()
|
|
}
|
|
|
|
// Otherwise parse individual characters.
|
|
switch ch0 {
|
|
case eof:
|
|
return EOF, pos, ""
|
|
case '"', '\'':
|
|
return s.scanString()
|
|
case '.', '+', '-':
|
|
return s.scanNumber()
|
|
case '*':
|
|
return MUL, pos, ""
|
|
case '/':
|
|
return DIV, pos, ""
|
|
case '=':
|
|
if ch1, _ := s.r.read(); ch1 == '=' {
|
|
return EQ, pos, ""
|
|
}
|
|
s.r.unread()
|
|
return ILLEGAL, pos, string(ch0)
|
|
case '!':
|
|
if ch1, _ := s.r.read(); ch1 == '=' {
|
|
return NEQ, pos, ""
|
|
}
|
|
s.r.unread()
|
|
return ILLEGAL, pos, string(ch0)
|
|
case '>':
|
|
if ch1, _ := s.r.read(); ch1 == '=' {
|
|
return GTE, pos, ""
|
|
}
|
|
s.r.unread()
|
|
return GT, pos, ""
|
|
case '<':
|
|
if ch1, _ := s.r.read(); ch1 == '=' {
|
|
return LTE, pos, ""
|
|
}
|
|
s.r.unread()
|
|
return LT, pos, ""
|
|
case '(':
|
|
return LPAREN, pos, ""
|
|
case ')':
|
|
return RPAREN, pos, ""
|
|
case ',':
|
|
return COMMA, pos, ""
|
|
}
|
|
|
|
return ILLEGAL, pos, string(ch0)
|
|
}
|
|
|
|
// scanWhitespace consumes the current rune and all contiguous whitespace.
|
|
func (s *Scanner) scanWhitespace() (tok Token, pos Pos, lit string) {
|
|
// Create a buffer and read the current character into it.
|
|
var buf bytes.Buffer
|
|
ch, pos := s.r.curr()
|
|
_, _ = buf.WriteRune(ch)
|
|
|
|
// Read every subsequent whitespace character into the buffer.
|
|
// Non-whitespace characters and EOF will cause the loop to exit.
|
|
for {
|
|
ch, _ = s.r.read()
|
|
if ch == eof {
|
|
break
|
|
} else if !isWhitespace(ch) {
|
|
s.r.unread()
|
|
break
|
|
} else {
|
|
_, _ = buf.WriteRune(ch)
|
|
}
|
|
}
|
|
|
|
return WS, pos, buf.String()
|
|
}
|
|
|
|
// scanIdent consumes the current rune and all contiguous ident runes.
|
|
func (s *Scanner) scanIdent() (tok Token, pos Pos, lit string) {
|
|
// Create a buffer and read the current character into it.
|
|
var buf bytes.Buffer
|
|
ch, pos := s.r.curr()
|
|
_, _ = buf.WriteRune(ch)
|
|
|
|
// Read every subsequent ident character into the buffer.
|
|
// Non-ident characters and EOF will cause the loop to exit.
|
|
for {
|
|
ch, _ = s.r.read()
|
|
if ch == eof {
|
|
break
|
|
} else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
|
|
s.r.unread()
|
|
break
|
|
} else {
|
|
_, _ = buf.WriteRune(ch)
|
|
}
|
|
}
|
|
|
|
// If the string matches a keyword then return that keyword.
|
|
if tok = Lookup(buf.String()); tok != IDENT {
|
|
return tok, pos, ""
|
|
}
|
|
|
|
// Otherwise return as a regular identifier.
|
|
return IDENT, pos, buf.String()
|
|
}
|
|
|
|
// scanString consumes a contiguous string of non-quote characters.
|
|
// Quote characters can be consumed if they're first escaped with a backslash.
|
|
func (s *Scanner) scanString() (tok Token, pos Pos, lit string) {
|
|
ending, pos := s.r.curr()
|
|
var buf bytes.Buffer
|
|
for {
|
|
ch0, pos0 := s.r.read()
|
|
if ch0 == ending {
|
|
return STRING, pos, buf.String()
|
|
} else if ch0 == eof || ch0 == '\n' {
|
|
return BADSTRING, pos, buf.String()
|
|
} else if ch0 == '\\' {
|
|
// If the next character is an escape then write the escaped char.
|
|
// If it's not a valid escape then return a BADESCAPE token.
|
|
ch1, _ := s.r.read()
|
|
if ch1 == 'n' {
|
|
_, _ = buf.WriteRune('\n')
|
|
} else if ch1 == '\\' {
|
|
_, _ = buf.WriteRune('\\')
|
|
} else {
|
|
return BADESCAPE, pos0, string(ch0) + string(ch1)
|
|
}
|
|
} else {
|
|
_, _ = buf.WriteRune(ch0)
|
|
}
|
|
}
|
|
}
|
|
|
|
// scanNumber consumes anything that looks like the start of a number.
|
|
// Numbers start with a digit, full stop, plus sign or minus sign.
|
|
// This function can return non-number tokens if a scan is a false positive.
|
|
// For example, a minus sign followed by a letter will just return a minus sign.
|
|
func (s *Scanner) scanNumber() (tok Token, pos Pos, lit string) {
|
|
var buf bytes.Buffer
|
|
|
|
// Check if the initial rune is a "+" or "-".
|
|
ch, pos := s.r.curr()
|
|
if ch == '+' || ch == '-' {
|
|
// Peek at the next two runes.
|
|
ch1, _ := s.r.read()
|
|
ch2, _ := s.r.read()
|
|
s.r.unread()
|
|
s.r.unread()
|
|
|
|
// This rune must be followed by a digit or a full stop and a digit.
|
|
if isDigit(ch1) || (ch1 == '.' && isDigit(ch2)) {
|
|
_, _ = buf.WriteRune(ch)
|
|
} else if ch == '+' {
|
|
return ADD, pos, ""
|
|
} else if ch == '-' {
|
|
return SUB, pos, ""
|
|
}
|
|
} else if ch == '.' {
|
|
// Peek and see if the next rune is a digit.
|
|
ch1, _ := s.r.read()
|
|
s.r.unread()
|
|
if !isDigit(ch1) {
|
|
return ILLEGAL, pos, "."
|
|
}
|
|
|
|
// Unread the full stop so we can read it later.
|
|
s.r.unread()
|
|
} else {
|
|
s.r.unread()
|
|
}
|
|
|
|
// Read as many digits as possible.
|
|
_, _ = buf.WriteString(s.scanDigits())
|
|
|
|
// If next code points are a full stop and digit then consume them.
|
|
if ch0, _ := s.r.read(); ch0 == '.' {
|
|
if ch1, _ := s.r.read(); isDigit(ch1) {
|
|
_, _ = buf.WriteRune(ch0)
|
|
_, _ = buf.WriteRune(ch1)
|
|
_, _ = buf.WriteString(s.scanDigits())
|
|
} else {
|
|
s.r.unread()
|
|
s.r.unread()
|
|
}
|
|
} else {
|
|
s.r.unread()
|
|
}
|
|
|
|
// If the next rune is a duration unit (u,µ,ms,s) then return a duration token
|
|
if ch0, _ := s.r.read(); ch0 == 'u' || ch0 == 'µ' || ch0 == 's' || ch0 == 'h' || ch0 == 'd' || ch0 == 'w' {
|
|
_, _ = buf.WriteRune(ch0)
|
|
return DURATION, pos, buf.String()
|
|
} else if ch0 == 'm' {
|
|
_, _ = buf.WriteRune(ch0)
|
|
if ch1, _ := s.r.read(); ch1 == 's' {
|
|
_, _ = buf.WriteRune(ch1)
|
|
} else {
|
|
s.r.unread()
|
|
}
|
|
return DURATION, pos, buf.String()
|
|
}
|
|
s.r.unread()
|
|
|
|
return NUMBER, pos, buf.String()
|
|
}
|
|
|
|
// scanDigits consume a contiguous series of digits.
|
|
func (s *Scanner) scanDigits() string {
|
|
var buf bytes.Buffer
|
|
for {
|
|
ch, _ := s.r.read()
|
|
if !isDigit(ch) {
|
|
s.r.unread()
|
|
break
|
|
}
|
|
_, _ = buf.WriteRune(ch)
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
// isWhitespace returns true if the rune is a space, tab, or newline.
|
|
func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
|
|
|
|
// isLetter returns true if the rune is a letter.
|
|
func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
|
|
|
|
// isDigit returns true if the rune is a digit.
|
|
func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
|
|
|
|
// reader represents a buffered rune reader used by the scanner.
|
|
// It provides a fixed-length circular buffer that can be unread.
|
|
type reader struct {
|
|
r io.RuneScanner
|
|
i int // buffer index
|
|
n int // buffer char count
|
|
pos Pos // last read rune position
|
|
buf [3]struct {
|
|
ch rune
|
|
pos Pos
|
|
}
|
|
}
|
|
|
|
// read reads the next rune from the reader.
|
|
func (r *reader) read() (ch rune, pos Pos) {
|
|
// If we have unread characters then read them off the buffer first.
|
|
if r.n > 0 {
|
|
r.n--
|
|
return r.curr()
|
|
}
|
|
|
|
// Read next rune from underlying reader.
|
|
// Any error (including io.EOF) should return as EOF.
|
|
ch, _, err := r.r.ReadRune()
|
|
if err != nil {
|
|
ch = eof
|
|
} else if ch == '\r' {
|
|
if ch, _, err := r.r.ReadRune(); err != nil {
|
|
// nop
|
|
} else if ch != '\n' {
|
|
_ = r.r.UnreadRune()
|
|
}
|
|
ch = '\n'
|
|
}
|
|
|
|
// Save character and position to the buffer.
|
|
r.i = (r.i + 1) % len(r.buf)
|
|
buf := &r.buf[r.i]
|
|
buf.ch, buf.pos = ch, r.pos
|
|
|
|
// Update position.
|
|
if ch == '\n' {
|
|
r.pos.Line++
|
|
r.pos.Char = 0
|
|
} else {
|
|
r.pos.Char++
|
|
}
|
|
|
|
return r.curr()
|
|
}
|
|
|
|
// unread pushes the previously read rune back onto the buffer.
|
|
func (r *reader) unread() {
|
|
r.n++
|
|
}
|
|
|
|
// curr returns the last read character and position.
|
|
func (r *reader) curr() (ch rune, pos Pos) {
|
|
i := (r.i - r.n + len(r.buf)) % len(r.buf)
|
|
buf := &r.buf[i]
|
|
return buf.ch, buf.pos
|
|
}
|
|
|
|
// eof is a marker code point to signify that the reader can't read any more.
|
|
const eof = rune(0)
|
|
|
|
func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
|
|
func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
|