diff options
Diffstat (limited to 'vendor/github.com/hashicorp/hil/scanner')
-rw-r--r-- | vendor/github.com/hashicorp/hil/scanner/peeker.go | 55 | ||||
-rw-r--r-- | vendor/github.com/hashicorp/hil/scanner/scanner.go | 550 | ||||
-rw-r--r-- | vendor/github.com/hashicorp/hil/scanner/token.go | 105 | ||||
-rw-r--r-- | vendor/github.com/hashicorp/hil/scanner/tokentype_string.go | 51 |
4 files changed, 761 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hil/scanner/peeker.go b/vendor/github.com/hashicorp/hil/scanner/peeker.go new file mode 100644 index 00000000..4de37283 --- /dev/null +++ b/vendor/github.com/hashicorp/hil/scanner/peeker.go @@ -0,0 +1,55 @@ +package scanner + +// Peeker is a utility that wraps a token channel returned by Scan and +// provides an interface that allows a caller (e.g. the parser) to +// work with the token stream in a mode that allows one token of lookahead, +// and provides utilities for more convenient processing of the stream. +type Peeker struct { + ch <-chan *Token + peeked *Token +} + +func NewPeeker(ch <-chan *Token) *Peeker { + return &Peeker{ + ch: ch, + } +} + +// Peek returns the next token in the stream without consuming it. A +// subsequent call to Read will return the same token. +func (p *Peeker) Peek() *Token { + if p.peeked == nil { + p.peeked = <-p.ch + } + return p.peeked +} + +// Read consumes the next token in the stream and returns it. +func (p *Peeker) Read() *Token { + token := p.Peek() + + // As a special case, we will produce the EOF token forever once + // it is reached. + if token.Type != EOF { + p.peeked = nil + } + + return token +} + +// Close ensures that the token stream has been exhausted, to prevent +// the goroutine in the underlying scanner from leaking. +// +// It's not necessary to call this if the caller reads the token stream +// to EOF, since that implicitly closes the scanner. +func (p *Peeker) Close() { + for _ = range p.ch { + // discard + } + // Install a synthetic EOF token in 'peeked' in case someone + // erroneously calls Peek() or Read() after we've closed. + p.peeked = &Token{ + Type: EOF, + Content: "", + } +} diff --git a/vendor/github.com/hashicorp/hil/scanner/scanner.go b/vendor/github.com/hashicorp/hil/scanner/scanner.go new file mode 100644 index 00000000..bab86c67 --- /dev/null +++ b/vendor/github.com/hashicorp/hil/scanner/scanner.go @@ -0,0 +1,550 @@ +package scanner + +import ( + "unicode" + "unicode/utf8" + + "github.com/hashicorp/hil/ast" +) + +// Scan returns a channel that recieves Tokens from the given input string. +// +// The scanner's job is just to partition the string into meaningful parts. +// It doesn't do any transformation of the raw input string, so the caller +// must deal with any further interpretation required, such as parsing INTEGER +// tokens into real ints, or dealing with escape sequences in LITERAL or +// STRING tokens. +// +// Strings in the returned tokens are slices from the original string. +// +// startPos should be set to ast.InitPos unless the caller knows that +// this interpolation string is part of a larger file and knows the position +// of the first character in that larger file. +func Scan(s string, startPos ast.Pos) <-chan *Token { + ch := make(chan *Token) + go scan(s, ch, startPos) + return ch +} + +func scan(s string, ch chan<- *Token, pos ast.Pos) { + // 'remain' starts off as the whole string but we gradually + // slice of the front of it as we work our way through. + remain := s + + // nesting keeps track of how many ${ .. } sequences we are + // inside, so we can recognize the minor differences in syntax + // between outer string literals (LITERAL tokens) and quoted + // string literals (STRING tokens). + nesting := 0 + + // We're going to flip back and forth between parsing literals/strings + // and parsing interpolation sequences ${ .. } until we reach EOF or + // some INVALID token. +All: + for { + startPos := pos + // Literal string processing first, since the beginning of + // a string is always outside of an interpolation sequence. + literalVal, terminator := scanLiteral(remain, pos, nesting > 0) + + if len(literalVal) > 0 { + litType := LITERAL + if nesting > 0 { + litType = STRING + } + ch <- &Token{ + Type: litType, + Content: literalVal, + Pos: startPos, + } + remain = remain[len(literalVal):] + } + + ch <- terminator + remain = remain[len(terminator.Content):] + pos = terminator.Pos + // Safe to use len() here because none of the terminator tokens + // can contain UTF-8 sequences. + pos.Column = pos.Column + len(terminator.Content) + + switch terminator.Type { + case INVALID: + // Synthetic EOF after invalid token, since further scanning + // is likely to just produce more garbage. + ch <- &Token{ + Type: EOF, + Content: "", + Pos: pos, + } + break All + case EOF: + // All done! + break All + case BEGIN: + nesting++ + case CQUOTE: + // nothing special to do + default: + // Should never happen + panic("invalid string/literal terminator") + } + + // Now we do the processing of the insides of ${ .. } sequences. + // This loop terminates when we encounter either a closing } or + // an opening ", which will cause us to return to literal processing. + Interpolation: + for { + + token, size, newPos := scanInterpolationToken(remain, pos) + ch <- token + remain = remain[size:] + pos = newPos + + switch token.Type { + case INVALID: + // Synthetic EOF after invalid token, since further scanning + // is likely to just produce more garbage. + ch <- &Token{ + Type: EOF, + Content: "", + Pos: pos, + } + break All + case EOF: + // All done + // (though a syntax error that we'll catch in the parser) + break All + case END: + nesting-- + if nesting < 0 { + // Can happen if there are unbalanced ${ and } sequences + // in the input, which we'll catch in the parser. + nesting = 0 + } + break Interpolation + case OQUOTE: + // Beginning of nested quoted string + break Interpolation + } + } + } + + close(ch) +} + +// Returns the token found at the start of the given string, followed by +// the number of bytes that were consumed from the string and the adjusted +// source position. +// +// Note that the number of bytes consumed can be more than the length of +// the returned token contents if the string begins with whitespace, since +// it will be silently consumed before reading the token. +func scanInterpolationToken(s string, startPos ast.Pos) (*Token, int, ast.Pos) { + pos := startPos + size := 0 + + // Consume whitespace, if any + for len(s) > 0 && byteIsSpace(s[0]) { + if s[0] == '\n' { + pos.Column = 1 + pos.Line++ + } else { + pos.Column++ + } + size++ + s = s[1:] + } + + // Unexpected EOF during sequence + if len(s) == 0 { + return &Token{ + Type: EOF, + Content: "", + Pos: pos, + }, size, pos + } + + next := s[0] + var token *Token + + switch next { + case '(', ')', '[', ']', ',', '.', '+', '-', '*', '/', '%', '?', ':': + // Easy punctuation symbols that don't have any special meaning + // during scanning, and that stand for themselves in the + // TokenType enumeration. + token = &Token{ + Type: TokenType(next), + Content: s[:1], + Pos: pos, + } + case '}': + token = &Token{ + Type: END, + Content: s[:1], + Pos: pos, + } + case '"': + token = &Token{ + Type: OQUOTE, + Content: s[:1], + Pos: pos, + } + case '!': + if len(s) >= 2 && s[:2] == "!=" { + token = &Token{ + Type: NOTEQUAL, + Content: s[:2], + Pos: pos, + } + } else { + token = &Token{ + Type: BANG, + Content: s[:1], + Pos: pos, + } + } + case '<': + if len(s) >= 2 && s[:2] == "<=" { + token = &Token{ + Type: LTE, + Content: s[:2], + Pos: pos, + } + } else { + token = &Token{ + Type: LT, + Content: s[:1], + Pos: pos, + } + } + case '>': + if len(s) >= 2 && s[:2] == ">=" { + token = &Token{ + Type: GTE, + Content: s[:2], + Pos: pos, + } + } else { + token = &Token{ + Type: GT, + Content: s[:1], + Pos: pos, + } + } + case '=': + if len(s) >= 2 && s[:2] == "==" { + token = &Token{ + Type: EQUAL, + Content: s[:2], + Pos: pos, + } + } else { + // A single equals is not a valid operator + token = &Token{ + Type: INVALID, + Content: s[:1], + Pos: pos, + } + } + case '&': + if len(s) >= 2 && s[:2] == "&&" { + token = &Token{ + Type: AND, + Content: s[:2], + Pos: pos, + } + } else { + token = &Token{ + Type: INVALID, + Content: s[:1], + Pos: pos, + } + } + case '|': + if len(s) >= 2 && s[:2] == "||" { + token = &Token{ + Type: OR, + Content: s[:2], + Pos: pos, + } + } else { + token = &Token{ + Type: INVALID, + Content: s[:1], + Pos: pos, + } + } + default: + if next >= '0' && next <= '9' { + num, numType := scanNumber(s) + token = &Token{ + Type: numType, + Content: num, + Pos: pos, + } + } else if stringStartsWithIdentifier(s) { + ident, runeLen := scanIdentifier(s) + tokenType := IDENTIFIER + if ident == "true" || ident == "false" { + tokenType = BOOL + } + token = &Token{ + Type: tokenType, + Content: ident, + Pos: pos, + } + // Skip usual token handling because it doesn't + // know how to deal with UTF-8 sequences. + pos.Column = pos.Column + runeLen + return token, size + len(ident), pos + } else { + _, byteLen := utf8.DecodeRuneInString(s) + token = &Token{ + Type: INVALID, + Content: s[:byteLen], + Pos: pos, + } + // Skip usual token handling because it doesn't + // know how to deal with UTF-8 sequences. + pos.Column = pos.Column + 1 + return token, size + byteLen, pos + } + } + + // Here we assume that the token content contains no UTF-8 sequences, + // because we dealt with UTF-8 characters as a special case where + // necessary above. + size = size + len(token.Content) + pos.Column = pos.Column + len(token.Content) + + return token, size, pos +} + +// Returns the (possibly-empty) prefix of the given string that represents +// a literal, followed by the token that marks the end of the literal. +func scanLiteral(s string, startPos ast.Pos, nested bool) (string, *Token) { + litLen := 0 + pos := startPos + var terminator *Token + for { + + if litLen >= len(s) { + if nested { + // We've ended in the middle of a quoted string, + // which means this token is actually invalid. + return "", &Token{ + Type: INVALID, + Content: s, + Pos: startPos, + } + } + terminator = &Token{ + Type: EOF, + Content: "", + Pos: pos, + } + break + } + + next := s[litLen] + + if next == '$' && len(s) > litLen+1 { + follow := s[litLen+1] + + if follow == '{' { + terminator = &Token{ + Type: BEGIN, + Content: s[litLen : litLen+2], + Pos: pos, + } + pos.Column = pos.Column + 2 + break + } else if follow == '$' { + // Double-$ escapes the special processing of $, + // so we will consume both characters here. + pos.Column = pos.Column + 2 + litLen = litLen + 2 + continue + } + } + + // special handling that applies only to quoted strings + if nested { + if next == '"' { + terminator = &Token{ + Type: CQUOTE, + Content: s[litLen : litLen+1], + Pos: pos, + } + pos.Column = pos.Column + 1 + break + } + + // Escaped quote marks do not terminate the string. + // + // All we do here in the scanner is avoid terminating a string + // due to an escaped quote. The parser is responsible for the + // full handling of escape sequences, since it's able to produce + // better error messages than we can produce in here. + if next == '\\' && len(s) > litLen+1 { + follow := s[litLen+1] + + if follow == '"' { + // \" escapes the special processing of ", + // so we will consume both characters here. + pos.Column = pos.Column + 2 + litLen = litLen + 2 + continue + } + } + } + + if next == '\n' { + pos.Column = 1 + pos.Line++ + litLen++ + } else { + pos.Column++ + + // "Column" measures runes, so we need to actually consume + // a valid UTF-8 character here. + _, size := utf8.DecodeRuneInString(s[litLen:]) + litLen = litLen + size + } + + } + + return s[:litLen], terminator +} + +// scanNumber returns the extent of the prefix of the string that represents +// a valid number, along with what type of number it represents: INT or FLOAT. +// +// scanNumber does only basic character analysis: numbers consist of digits +// and periods, with at least one period signalling a FLOAT. It's the parser's +// responsibility to validate the form and range of the number, such as ensuring +// that a FLOAT actually contains only one period, etc. +func scanNumber(s string) (string, TokenType) { + period := -1 + byteLen := 0 + numType := INTEGER + for { + if byteLen >= len(s) { + break + } + + next := s[byteLen] + if next != '.' && (next < '0' || next > '9') { + // If our last value was a period, then we're not a float, + // we're just an integer that ends in a period. + if period == byteLen-1 { + byteLen-- + numType = INTEGER + } + + break + } + + if next == '.' { + // If we've already seen a period, break out + if period >= 0 { + break + } + + period = byteLen + numType = FLOAT + } + + byteLen++ + } + + return s[:byteLen], numType +} + +// scanIdentifier returns the extent of the prefix of the string that +// represents a valid identifier, along with the length of that prefix +// in runes. +// +// Identifiers may contain utf8-encoded non-Latin letters, which will +// cause the returned "rune length" to be shorter than the byte length +// of the returned string. +func scanIdentifier(s string) (string, int) { + byteLen := 0 + runeLen := 0 + for { + if byteLen >= len(s) { + break + } + + nextRune, size := utf8.DecodeRuneInString(s[byteLen:]) + if !(nextRune == '_' || + nextRune == '-' || + nextRune == '.' || + nextRune == '*' || + unicode.IsNumber(nextRune) || + unicode.IsLetter(nextRune) || + unicode.IsMark(nextRune)) { + break + } + + // If we reach a star, it must be between periods to be part + // of the same identifier. + if nextRune == '*' && s[byteLen-1] != '.' { + break + } + + // If our previous character was a star, then the current must + // be period. Otherwise, undo that and exit. + if byteLen > 0 && s[byteLen-1] == '*' && nextRune != '.' { + byteLen-- + if s[byteLen-1] == '.' { + byteLen-- + } + + break + } + + byteLen = byteLen + size + runeLen = runeLen + 1 + } + + return s[:byteLen], runeLen +} + +// byteIsSpace implements a restrictive interpretation of spaces that includes +// only what's valid inside interpolation sequences: spaces, tabs, newlines. +func byteIsSpace(b byte) bool { + switch b { + case ' ', '\t', '\r', '\n': + return true + default: + return false + } +} + +// stringStartsWithIdentifier returns true if the given string begins with +// a character that is a legal start of an identifier: an underscore or +// any character that Unicode considers to be a letter. +func stringStartsWithIdentifier(s string) bool { + if len(s) == 0 { + return false + } + + first := s[0] + + // Easy ASCII cases first + if (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_' { + return true + } + + // If our first byte begins a UTF-8 sequence then the sequence might + // be a unicode letter. + if utf8.RuneStart(first) { + firstRune, _ := utf8.DecodeRuneInString(s) + if unicode.IsLetter(firstRune) { + return true + } + } + + return false +} diff --git a/vendor/github.com/hashicorp/hil/scanner/token.go b/vendor/github.com/hashicorp/hil/scanner/token.go new file mode 100644 index 00000000..b6c82ae9 --- /dev/null +++ b/vendor/github.com/hashicorp/hil/scanner/token.go @@ -0,0 +1,105 @@ +package scanner + +import ( + "fmt" + + "github.com/hashicorp/hil/ast" +) + +type Token struct { + Type TokenType + Content string + Pos ast.Pos +} + +//go:generate stringer -type=TokenType +type TokenType rune + +const ( + // Raw string data outside of ${ .. } sequences + LITERAL TokenType = 'o' + + // STRING is like a LITERAL but it's inside a quoted string + // within a ${ ... } sequence, and so it can contain backslash + // escaping. + STRING TokenType = 'S' + + // Other Literals + INTEGER TokenType = 'I' + FLOAT TokenType = 'F' + BOOL TokenType = 'B' + + BEGIN TokenType = '$' // actually "${" + END TokenType = '}' + OQUOTE TokenType = '“' // Opening quote of a nested quoted sequence + CQUOTE TokenType = '”' // Closing quote of a nested quoted sequence + OPAREN TokenType = '(' + CPAREN TokenType = ')' + OBRACKET TokenType = '[' + CBRACKET TokenType = ']' + COMMA TokenType = ',' + + IDENTIFIER TokenType = 'i' + + PERIOD TokenType = '.' + PLUS TokenType = '+' + MINUS TokenType = '-' + STAR TokenType = '*' + SLASH TokenType = '/' + PERCENT TokenType = '%' + + AND TokenType = '∧' + OR TokenType = '∨' + BANG TokenType = '!' + + EQUAL TokenType = '=' + NOTEQUAL TokenType = '≠' + GT TokenType = '>' + LT TokenType = '<' + GTE TokenType = '≥' + LTE TokenType = '≤' + + QUESTION TokenType = '?' + COLON TokenType = ':' + + EOF TokenType = '␄' + + // Produced for sequences that cannot be understood as valid tokens + // e.g. due to use of unrecognized punctuation. + INVALID TokenType = '�' +) + +func (t *Token) String() string { + switch t.Type { + case EOF: + return "end of string" + case INVALID: + return fmt.Sprintf("invalid sequence %q", t.Content) + case INTEGER: + return fmt.Sprintf("integer %s", t.Content) + case FLOAT: + return fmt.Sprintf("float %s", t.Content) + case STRING: + return fmt.Sprintf("string %q", t.Content) + case LITERAL: + return fmt.Sprintf("literal %q", t.Content) + case OQUOTE: + return fmt.Sprintf("opening quote") + case CQUOTE: + return fmt.Sprintf("closing quote") + case AND: + return "&&" + case OR: + return "||" + case NOTEQUAL: + return "!=" + case GTE: + return ">=" + case LTE: + return "<=" + default: + // The remaining token types have content that + // speaks for itself. + return fmt.Sprintf("%q", t.Content) + } +} diff --git a/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go b/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go new file mode 100644 index 00000000..a602f5fd --- /dev/null +++ b/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go @@ -0,0 +1,51 @@ +// Code generated by "stringer -type=TokenType"; DO NOT EDIT + +package scanner + +import "fmt" + +const _TokenType_name = "BANGBEGINPERCENTOPARENCPARENSTARPLUSCOMMAMINUSPERIODSLASHCOLONLTEQUALGTQUESTIONBOOLFLOATINTEGERSTRINGOBRACKETCBRACKETIDENTIFIERLITERALENDOQUOTECQUOTEANDORNOTEQUALLTEGTEEOFINVALID" + +var _TokenType_map = map[TokenType]string{ + 33: _TokenType_name[0:4], + 36: _TokenType_name[4:9], + 37: _TokenType_name[9:16], + 40: _TokenType_name[16:22], + 41: _TokenType_name[22:28], + 42: _TokenType_name[28:32], + 43: _TokenType_name[32:36], + 44: _TokenType_name[36:41], + 45: _TokenType_name[41:46], + 46: _TokenType_name[46:52], + 47: _TokenType_name[52:57], + 58: _TokenType_name[57:62], + 60: _TokenType_name[62:64], + 61: _TokenType_name[64:69], + 62: _TokenType_name[69:71], + 63: _TokenType_name[71:79], + 66: _TokenType_name[79:83], + 70: _TokenType_name[83:88], + 73: _TokenType_name[88:95], + 83: _TokenType_name[95:101], + 91: _TokenType_name[101:109], + 93: _TokenType_name[109:117], + 105: _TokenType_name[117:127], + 111: _TokenType_name[127:134], + 125: _TokenType_name[134:137], + 8220: _TokenType_name[137:143], + 8221: _TokenType_name[143:149], + 8743: _TokenType_name[149:152], + 8744: _TokenType_name[152:154], + 8800: _TokenType_name[154:162], + 8804: _TokenType_name[162:165], + 8805: _TokenType_name[165:168], + 9220: _TokenType_name[168:171], + 65533: _TokenType_name[171:178], +} + +func (i TokenType) String() string { + if str, ok := _TokenType_map[i]; ok { + return str + } + return fmt.Sprintf("TokenType(%d)", i) +} |