package influxql import ( "bufio" "bytes" "fmt" "io" "os" "strings" ) // Scanner represents a lexical scanner for InfluxQL. type Scanner struct { r *reader } // NewScanner returns a new instance of Scanner. func NewScanner(r io.Reader) *Scanner { return &Scanner{r: &reader{r: bufio.NewReader(r)}} } // Scan returns the next token and position from the underlying reader. // Also returns the literal text read for strings, numbers, and duration tokens // since these token types can have different literal representations. func (s *Scanner) Scan() (tok Token, pos Pos, lit string) { // Read next code point. ch0, pos := s.r.read() // If we see whitespace then consume all contiguous whitespace. // If we see a letter then consume as an ident or reserved word. if isWhitespace(ch0) { return s.scanWhitespace() } else if isLetter(ch0) { return s.scanIdent() } else if isDigit(ch0) { return s.scanNumber() } // Otherwise parse individual characters. switch ch0 { case eof: return EOF, pos, "" case '"', '\'': return s.scanString() case '.', '+', '-': return s.scanNumber() case '*': return MUL, pos, "" case '/': return DIV, pos, "" case '=': return EQ, pos, "" case '!': if ch1, _ := s.r.read(); ch1 == '=' { return NEQ, pos, "" } s.r.unread() return ILLEGAL, pos, string(ch0) case '>': if ch1, _ := s.r.read(); ch1 == '=' { return GTE, pos, "" } s.r.unread() return GT, pos, "" case '<': if ch1, _ := s.r.read(); ch1 == '=' { return LTE, pos, "" } s.r.unread() return LT, pos, "" case '(': return LPAREN, pos, "" case ')': return RPAREN, pos, "" case ',': return COMMA, pos, "" case ';': return SEMICOLON, pos, "" } return ILLEGAL, pos, string(ch0) } // scanWhitespace consumes the current rune and all contiguous whitespace. func (s *Scanner) scanWhitespace() (tok Token, pos Pos, lit string) { // Create a buffer and read the current character into it. var buf bytes.Buffer ch, pos := s.r.curr() _, _ = buf.WriteRune(ch) // Read every subsequent whitespace character into the buffer. // Non-whitespace characters and EOF will cause the loop to exit. for { ch, _ = s.r.read() if ch == eof { break } else if !isWhitespace(ch) { s.r.unread() break } else { _, _ = buf.WriteRune(ch) } } return WS, pos, buf.String() } // scanIdent consumes the current rune and all contiguous ident runes. func (s *Scanner) scanIdent() (tok Token, pos Pos, lit string) { // Create a buffer and read the current character into it. var buf bytes.Buffer ch, pos := s.r.curr() _, _ = buf.WriteRune(ch) // Read every subsequent ident character into the buffer. // Non-ident characters and EOF will cause the loop to exit. for { ch, _ = s.r.read() if ch == eof { break } else if !isLetter(ch) && !isDigit(ch) && ch != '_' && ch != '.' { s.r.unread() break } else { _, _ = buf.WriteRune(ch) } } // If the string matches a keyword then return that keyword. if tok = Lookup(buf.String()); tok != IDENT { return tok, pos, "" } // Otherwise return as a regular identifier. return IDENT, pos, buf.String() } // scanString consumes a contiguous string of non-quote characters. // Quote characters can be consumed if they're first escaped with a backslash. func (s *Scanner) scanString() (tok Token, pos Pos, lit string) { ending, pos := s.r.curr() var buf bytes.Buffer for { ch0, pos0 := s.r.read() if ch0 == ending { return STRING, pos, buf.String() } else if ch0 == eof || ch0 == '\n' { return BADSTRING, pos, buf.String() } else if ch0 == '\\' { // If the next character is an escape then write the escaped char. // If it's not a valid escape then return a BADESCAPE token. ch1, _ := s.r.read() if ch1 == 'n' { _, _ = buf.WriteRune('\n') } else if ch1 == '\\' { _, _ = buf.WriteRune('\\') } else if ch1 == '"' { _, _ = buf.WriteRune('"') } else { return BADESCAPE, pos0, string(ch0) + string(ch1) } } else { _, _ = buf.WriteRune(ch0) } } } // scanNumber consumes anything that looks like the start of a number. // Numbers start with a digit, full stop, plus sign or minus sign. // This function can return non-number tokens if a scan is a false positive. // For example, a minus sign followed by a letter will just return a minus sign. func (s *Scanner) scanNumber() (tok Token, pos Pos, lit string) { var buf bytes.Buffer // Check if the initial rune is a "+" or "-". ch, pos := s.r.curr() if ch == '+' || ch == '-' { // Peek at the next two runes. ch1, _ := s.r.read() ch2, _ := s.r.read() s.r.unread() s.r.unread() // This rune must be followed by a digit or a full stop and a digit. if isDigit(ch1) || (ch1 == '.' && isDigit(ch2)) { _, _ = buf.WriteRune(ch) } else if ch == '+' { return ADD, pos, "" } else if ch == '-' { return SUB, pos, "" } } else if ch == '.' { // Peek and see if the next rune is a digit. ch1, _ := s.r.read() s.r.unread() if !isDigit(ch1) { return ILLEGAL, pos, "." } // Unread the full stop so we can read it later. s.r.unread() } else { s.r.unread() } // Read as many digits as possible. _, _ = buf.WriteString(s.scanDigits()) // If next code points are a full stop and digit then consume them. if ch0, _ := s.r.read(); ch0 == '.' { if ch1, _ := s.r.read(); isDigit(ch1) { _, _ = buf.WriteRune(ch0) _, _ = buf.WriteRune(ch1) _, _ = buf.WriteString(s.scanDigits()) } else { s.r.unread() s.r.unread() } } else { s.r.unread() } // Attempt to read as a duration if it doesn't have a fractional part. if !strings.Contains(buf.String(), ".") { // If the next rune is a duration unit (u,µ,ms,s) then return a duration token if ch0, _ := s.r.read(); ch0 == 'u' || ch0 == 'µ' || ch0 == 's' || ch0 == 'h' || ch0 == 'd' || ch0 == 'w' { _, _ = buf.WriteRune(ch0) return DURATION, pos, buf.String() } else if ch0 == 'm' { _, _ = buf.WriteRune(ch0) if ch1, _ := s.r.read(); ch1 == 's' { _, _ = buf.WriteRune(ch1) } else { s.r.unread() } return DURATION, pos, buf.String() } s.r.unread() } return NUMBER, pos, buf.String() } // scanDigits consume a contiguous series of digits. func (s *Scanner) scanDigits() string { var buf bytes.Buffer for { ch, _ := s.r.read() if !isDigit(ch) { s.r.unread() break } _, _ = buf.WriteRune(ch) } return buf.String() } // isWhitespace returns true if the rune is a space, tab, or newline. func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' } // isLetter returns true if the rune is a letter. func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } // isDigit returns true if the rune is a digit. func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') } // bufScanner represents a wrapper for scanner to add a buffer. // It provides a fixed-length circular buffer that can be unread. type bufScanner struct { s *Scanner i int // buffer index n int // buffer size buf [3]struct { tok Token pos Pos lit string } } // newBufScanner returns a new buffered scanner for a reader. func newBufScanner(r io.Reader) *bufScanner { return &bufScanner{s: NewScanner(r)} } // Scan reads the next token from the scanner. func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) { // If we have unread tokens then read them off the buffer first. if s.n > 0 { s.n-- return s.curr() } // Move buffer position forward and save the token. s.i = (s.i + 1) % len(s.buf) buf := &s.buf[s.i] buf.tok, buf.pos, buf.lit = s.s.Scan() return s.curr() } // Unscan pushes the previously token back onto the buffer. func (s *bufScanner) Unscan() { s.n++ } // curr returns the last read token. func (s *bufScanner) curr() (tok Token, pos Pos, lit string) { buf := &s.buf[(s.i-s.n+len(s.buf))%len(s.buf)] return buf.tok, buf.pos, buf.lit } // reader represents a buffered rune reader used by the scanner. // It provides a fixed-length circular buffer that can be unread. type reader struct { r io.RuneScanner i int // buffer index n int // buffer char count pos Pos // last read rune position buf [3]struct { ch rune pos Pos } } // read reads the next rune from the reader. func (r *reader) read() (ch rune, pos Pos) { // If we have unread characters then read them off the buffer first. if r.n > 0 { r.n-- return r.curr() } // Read next rune from underlying reader. // Any error (including io.EOF) should return as EOF. ch, _, err := r.r.ReadRune() if err != nil { ch = eof } else if ch == '\r' { if ch, _, err := r.r.ReadRune(); err != nil { // nop } else if ch != '\n' { _ = r.r.UnreadRune() } ch = '\n' } // Save character and position to the buffer. r.i = (r.i + 1) % len(r.buf) buf := &r.buf[r.i] buf.ch, buf.pos = ch, r.pos // Update position. if ch == '\n' { r.pos.Line++ r.pos.Char = 0 } else { r.pos.Char++ } return r.curr() } // unread pushes the previously read rune back onto the buffer. func (r *reader) unread() { r.n++ } // curr returns the last read character and position. func (r *reader) curr() (ch rune, pos Pos) { i := (r.i - r.n + len(r.buf)) % len(r.buf) buf := &r.buf[i] return buf.ch, buf.pos } // eof is a marker code point to signify that the reader can't read any more. const eof = rune(0) func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) } func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }