summaryrefslogtreecommitdiff
path: root/vendor/github.com/hashicorp/hil/scanner
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/hashicorp/hil/scanner')
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/peeker.go55
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/scanner.go550
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/token.go105
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/tokentype_string.go51
4 files changed, 761 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hil/scanner/peeker.go b/vendor/github.com/hashicorp/hil/scanner/peeker.go
new file mode 100644
index 00000000..4de37283
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/peeker.go
@@ -0,0 +1,55 @@
+package scanner
+
+// Peeker is a utility that wraps a token channel returned by Scan and
+// provides an interface that allows a caller (e.g. the parser) to
+// work with the token stream in a mode that allows one token of lookahead,
+// and provides utilities for more convenient processing of the stream.
+type Peeker struct {
+ ch <-chan *Token
+ peeked *Token
+}
+
+func NewPeeker(ch <-chan *Token) *Peeker {
+ return &Peeker{
+ ch: ch,
+ }
+}
+
+// Peek returns the next token in the stream without consuming it. A
+// subsequent call to Read will return the same token.
+func (p *Peeker) Peek() *Token {
+ if p.peeked == nil {
+ p.peeked = <-p.ch
+ }
+ return p.peeked
+}
+
+// Read consumes the next token in the stream and returns it.
+func (p *Peeker) Read() *Token {
+ token := p.Peek()
+
+ // As a special case, we will produce the EOF token forever once
+ // it is reached.
+ if token.Type != EOF {
+ p.peeked = nil
+ }
+
+ return token
+}
+
+// Close ensures that the token stream has been exhausted, to prevent
+// the goroutine in the underlying scanner from leaking.
+//
+// It's not necessary to call this if the caller reads the token stream
+// to EOF, since that implicitly closes the scanner.
+func (p *Peeker) Close() {
+ for _ = range p.ch {
+ // discard
+ }
+ // Install a synthetic EOF token in 'peeked' in case someone
+ // erroneously calls Peek() or Read() after we've closed.
+ p.peeked = &Token{
+ Type: EOF,
+ Content: "",
+ }
+}
diff --git a/vendor/github.com/hashicorp/hil/scanner/scanner.go b/vendor/github.com/hashicorp/hil/scanner/scanner.go
new file mode 100644
index 00000000..bab86c67
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/scanner.go
@@ -0,0 +1,550 @@
+package scanner
+
+import (
+ "unicode"
+ "unicode/utf8"
+
+ "github.com/hashicorp/hil/ast"
+)
+
+// Scan returns a channel that recieves Tokens from the given input string.
+//
+// The scanner's job is just to partition the string into meaningful parts.
+// It doesn't do any transformation of the raw input string, so the caller
+// must deal with any further interpretation required, such as parsing INTEGER
+// tokens into real ints, or dealing with escape sequences in LITERAL or
+// STRING tokens.
+//
+// Strings in the returned tokens are slices from the original string.
+//
+// startPos should be set to ast.InitPos unless the caller knows that
+// this interpolation string is part of a larger file and knows the position
+// of the first character in that larger file.
+func Scan(s string, startPos ast.Pos) <-chan *Token {
+ ch := make(chan *Token)
+ go scan(s, ch, startPos)
+ return ch
+}
+
+func scan(s string, ch chan<- *Token, pos ast.Pos) {
+ // 'remain' starts off as the whole string but we gradually
+ // slice of the front of it as we work our way through.
+ remain := s
+
+ // nesting keeps track of how many ${ .. } sequences we are
+ // inside, so we can recognize the minor differences in syntax
+ // between outer string literals (LITERAL tokens) and quoted
+ // string literals (STRING tokens).
+ nesting := 0
+
+ // We're going to flip back and forth between parsing literals/strings
+ // and parsing interpolation sequences ${ .. } until we reach EOF or
+ // some INVALID token.
+All:
+ for {
+ startPos := pos
+ // Literal string processing first, since the beginning of
+ // a string is always outside of an interpolation sequence.
+ literalVal, terminator := scanLiteral(remain, pos, nesting > 0)
+
+ if len(literalVal) > 0 {
+ litType := LITERAL
+ if nesting > 0 {
+ litType = STRING
+ }
+ ch <- &Token{
+ Type: litType,
+ Content: literalVal,
+ Pos: startPos,
+ }
+ remain = remain[len(literalVal):]
+ }
+
+ ch <- terminator
+ remain = remain[len(terminator.Content):]
+ pos = terminator.Pos
+ // Safe to use len() here because none of the terminator tokens
+ // can contain UTF-8 sequences.
+ pos.Column = pos.Column + len(terminator.Content)
+
+ switch terminator.Type {
+ case INVALID:
+ // Synthetic EOF after invalid token, since further scanning
+ // is likely to just produce more garbage.
+ ch <- &Token{
+ Type: EOF,
+ Content: "",
+ Pos: pos,
+ }
+ break All
+ case EOF:
+ // All done!
+ break All
+ case BEGIN:
+ nesting++
+ case CQUOTE:
+ // nothing special to do
+ default:
+ // Should never happen
+ panic("invalid string/literal terminator")
+ }
+
+ // Now we do the processing of the insides of ${ .. } sequences.
+ // This loop terminates when we encounter either a closing } or
+ // an opening ", which will cause us to return to literal processing.
+ Interpolation:
+ for {
+
+ token, size, newPos := scanInterpolationToken(remain, pos)
+ ch <- token
+ remain = remain[size:]
+ pos = newPos
+
+ switch token.Type {
+ case INVALID:
+ // Synthetic EOF after invalid token, since further scanning
+ // is likely to just produce more garbage.
+ ch <- &Token{
+ Type: EOF,
+ Content: "",
+ Pos: pos,
+ }
+ break All
+ case EOF:
+ // All done
+ // (though a syntax error that we'll catch in the parser)
+ break All
+ case END:
+ nesting--
+ if nesting < 0 {
+ // Can happen if there are unbalanced ${ and } sequences
+ // in the input, which we'll catch in the parser.
+ nesting = 0
+ }
+ break Interpolation
+ case OQUOTE:
+ // Beginning of nested quoted string
+ break Interpolation
+ }
+ }
+ }
+
+ close(ch)
+}
+
+// Returns the token found at the start of the given string, followed by
+// the number of bytes that were consumed from the string and the adjusted
+// source position.
+//
+// Note that the number of bytes consumed can be more than the length of
+// the returned token contents if the string begins with whitespace, since
+// it will be silently consumed before reading the token.
+func scanInterpolationToken(s string, startPos ast.Pos) (*Token, int, ast.Pos) {
+ pos := startPos
+ size := 0
+
+ // Consume whitespace, if any
+ for len(s) > 0 && byteIsSpace(s[0]) {
+ if s[0] == '\n' {
+ pos.Column = 1
+ pos.Line++
+ } else {
+ pos.Column++
+ }
+ size++
+ s = s[1:]
+ }
+
+ // Unexpected EOF during sequence
+ if len(s) == 0 {
+ return &Token{
+ Type: EOF,
+ Content: "",
+ Pos: pos,
+ }, size, pos
+ }
+
+ next := s[0]
+ var token *Token
+
+ switch next {
+ case '(', ')', '[', ']', ',', '.', '+', '-', '*', '/', '%', '?', ':':
+ // Easy punctuation symbols that don't have any special meaning
+ // during scanning, and that stand for themselves in the
+ // TokenType enumeration.
+ token = &Token{
+ Type: TokenType(next),
+ Content: s[:1],
+ Pos: pos,
+ }
+ case '}':
+ token = &Token{
+ Type: END,
+ Content: s[:1],
+ Pos: pos,
+ }
+ case '"':
+ token = &Token{
+ Type: OQUOTE,
+ Content: s[:1],
+ Pos: pos,
+ }
+ case '!':
+ if len(s) >= 2 && s[:2] == "!=" {
+ token = &Token{
+ Type: NOTEQUAL,
+ Content: s[:2],
+ Pos: pos,
+ }
+ } else {
+ token = &Token{
+ Type: BANG,
+ Content: s[:1],
+ Pos: pos,
+ }
+ }
+ case '<':
+ if len(s) >= 2 && s[:2] == "<=" {
+ token = &Token{
+ Type: LTE,
+ Content: s[:2],
+ Pos: pos,
+ }
+ } else {
+ token = &Token{
+ Type: LT,
+ Content: s[:1],
+ Pos: pos,
+ }
+ }
+ case '>':
+ if len(s) >= 2 && s[:2] == ">=" {
+ token = &Token{
+ Type: GTE,
+ Content: s[:2],
+ Pos: pos,
+ }
+ } else {
+ token = &Token{
+ Type: GT,
+ Content: s[:1],
+ Pos: pos,
+ }
+ }
+ case '=':
+ if len(s) >= 2 && s[:2] == "==" {
+ token = &Token{
+ Type: EQUAL,
+ Content: s[:2],
+ Pos: pos,
+ }
+ } else {
+ // A single equals is not a valid operator
+ token = &Token{
+ Type: INVALID,
+ Content: s[:1],
+ Pos: pos,
+ }
+ }
+ case '&':
+ if len(s) >= 2 && s[:2] == "&&" {
+ token = &Token{
+ Type: AND,
+ Content: s[:2],
+ Pos: pos,
+ }
+ } else {
+ token = &Token{
+ Type: INVALID,
+ Content: s[:1],
+ Pos: pos,
+ }
+ }
+ case '|':
+ if len(s) >= 2 && s[:2] == "||" {
+ token = &Token{
+ Type: OR,
+ Content: s[:2],
+ Pos: pos,
+ }
+ } else {
+ token = &Token{
+ Type: INVALID,
+ Content: s[:1],
+ Pos: pos,
+ }
+ }
+ default:
+ if next >= '0' && next <= '9' {
+ num, numType := scanNumber(s)
+ token = &Token{
+ Type: numType,
+ Content: num,
+ Pos: pos,
+ }
+ } else if stringStartsWithIdentifier(s) {
+ ident, runeLen := scanIdentifier(s)
+ tokenType := IDENTIFIER
+ if ident == "true" || ident == "false" {
+ tokenType = BOOL
+ }
+ token = &Token{
+ Type: tokenType,
+ Content: ident,
+ Pos: pos,
+ }
+ // Skip usual token handling because it doesn't
+ // know how to deal with UTF-8 sequences.
+ pos.Column = pos.Column + runeLen
+ return token, size + len(ident), pos
+ } else {
+ _, byteLen := utf8.DecodeRuneInString(s)
+ token = &Token{
+ Type: INVALID,
+ Content: s[:byteLen],
+ Pos: pos,
+ }
+ // Skip usual token handling because it doesn't
+ // know how to deal with UTF-8 sequences.
+ pos.Column = pos.Column + 1
+ return token, size + byteLen, pos
+ }
+ }
+
+ // Here we assume that the token content contains no UTF-8 sequences,
+ // because we dealt with UTF-8 characters as a special case where
+ // necessary above.
+ size = size + len(token.Content)
+ pos.Column = pos.Column + len(token.Content)
+
+ return token, size, pos
+}
+
+// Returns the (possibly-empty) prefix of the given string that represents
+// a literal, followed by the token that marks the end of the literal.
+func scanLiteral(s string, startPos ast.Pos, nested bool) (string, *Token) {
+ litLen := 0
+ pos := startPos
+ var terminator *Token
+ for {
+
+ if litLen >= len(s) {
+ if nested {
+ // We've ended in the middle of a quoted string,
+ // which means this token is actually invalid.
+ return "", &Token{
+ Type: INVALID,
+ Content: s,
+ Pos: startPos,
+ }
+ }
+ terminator = &Token{
+ Type: EOF,
+ Content: "",
+ Pos: pos,
+ }
+ break
+ }
+
+ next := s[litLen]
+
+ if next == '$' && len(s) > litLen+1 {
+ follow := s[litLen+1]
+
+ if follow == '{' {
+ terminator = &Token{
+ Type: BEGIN,
+ Content: s[litLen : litLen+2],
+ Pos: pos,
+ }
+ pos.Column = pos.Column + 2
+ break
+ } else if follow == '$' {
+ // Double-$ escapes the special processing of $,
+ // so we will consume both characters here.
+ pos.Column = pos.Column + 2
+ litLen = litLen + 2
+ continue
+ }
+ }
+
+ // special handling that applies only to quoted strings
+ if nested {
+ if next == '"' {
+ terminator = &Token{
+ Type: CQUOTE,
+ Content: s[litLen : litLen+1],
+ Pos: pos,
+ }
+ pos.Column = pos.Column + 1
+ break
+ }
+
+ // Escaped quote marks do not terminate the string.
+ //
+ // All we do here in the scanner is avoid terminating a string
+ // due to an escaped quote. The parser is responsible for the
+ // full handling of escape sequences, since it's able to produce
+ // better error messages than we can produce in here.
+ if next == '\\' && len(s) > litLen+1 {
+ follow := s[litLen+1]
+
+ if follow == '"' {
+ // \" escapes the special processing of ",
+ // so we will consume both characters here.
+ pos.Column = pos.Column + 2
+ litLen = litLen + 2
+ continue
+ }
+ }
+ }
+
+ if next == '\n' {
+ pos.Column = 1
+ pos.Line++
+ litLen++
+ } else {
+ pos.Column++
+
+ // "Column" measures runes, so we need to actually consume
+ // a valid UTF-8 character here.
+ _, size := utf8.DecodeRuneInString(s[litLen:])
+ litLen = litLen + size
+ }
+
+ }
+
+ return s[:litLen], terminator
+}
+
+// scanNumber returns the extent of the prefix of the string that represents
+// a valid number, along with what type of number it represents: INT or FLOAT.
+//
+// scanNumber does only basic character analysis: numbers consist of digits
+// and periods, with at least one period signalling a FLOAT. It's the parser's
+// responsibility to validate the form and range of the number, such as ensuring
+// that a FLOAT actually contains only one period, etc.
+func scanNumber(s string) (string, TokenType) {
+ period := -1
+ byteLen := 0
+ numType := INTEGER
+ for {
+ if byteLen >= len(s) {
+ break
+ }
+
+ next := s[byteLen]
+ if next != '.' && (next < '0' || next > '9') {
+ // If our last value was a period, then we're not a float,
+ // we're just an integer that ends in a period.
+ if period == byteLen-1 {
+ byteLen--
+ numType = INTEGER
+ }
+
+ break
+ }
+
+ if next == '.' {
+ // If we've already seen a period, break out
+ if period >= 0 {
+ break
+ }
+
+ period = byteLen
+ numType = FLOAT
+ }
+
+ byteLen++
+ }
+
+ return s[:byteLen], numType
+}
+
+// scanIdentifier returns the extent of the prefix of the string that
+// represents a valid identifier, along with the length of that prefix
+// in runes.
+//
+// Identifiers may contain utf8-encoded non-Latin letters, which will
+// cause the returned "rune length" to be shorter than the byte length
+// of the returned string.
+func scanIdentifier(s string) (string, int) {
+ byteLen := 0
+ runeLen := 0
+ for {
+ if byteLen >= len(s) {
+ break
+ }
+
+ nextRune, size := utf8.DecodeRuneInString(s[byteLen:])
+ if !(nextRune == '_' ||
+ nextRune == '-' ||
+ nextRune == '.' ||
+ nextRune == '*' ||
+ unicode.IsNumber(nextRune) ||
+ unicode.IsLetter(nextRune) ||
+ unicode.IsMark(nextRune)) {
+ break
+ }
+
+ // If we reach a star, it must be between periods to be part
+ // of the same identifier.
+ if nextRune == '*' && s[byteLen-1] != '.' {
+ break
+ }
+
+ // If our previous character was a star, then the current must
+ // be period. Otherwise, undo that and exit.
+ if byteLen > 0 && s[byteLen-1] == '*' && nextRune != '.' {
+ byteLen--
+ if s[byteLen-1] == '.' {
+ byteLen--
+ }
+
+ break
+ }
+
+ byteLen = byteLen + size
+ runeLen = runeLen + 1
+ }
+
+ return s[:byteLen], runeLen
+}
+
+// byteIsSpace implements a restrictive interpretation of spaces that includes
+// only what's valid inside interpolation sequences: spaces, tabs, newlines.
+func byteIsSpace(b byte) bool {
+ switch b {
+ case ' ', '\t', '\r', '\n':
+ return true
+ default:
+ return false
+ }
+}
+
+// stringStartsWithIdentifier returns true if the given string begins with
+// a character that is a legal start of an identifier: an underscore or
+// any character that Unicode considers to be a letter.
+func stringStartsWithIdentifier(s string) bool {
+ if len(s) == 0 {
+ return false
+ }
+
+ first := s[0]
+
+ // Easy ASCII cases first
+ if (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_' {
+ return true
+ }
+
+ // If our first byte begins a UTF-8 sequence then the sequence might
+ // be a unicode letter.
+ if utf8.RuneStart(first) {
+ firstRune, _ := utf8.DecodeRuneInString(s)
+ if unicode.IsLetter(firstRune) {
+ return true
+ }
+ }
+
+ return false
+}
diff --git a/vendor/github.com/hashicorp/hil/scanner/token.go b/vendor/github.com/hashicorp/hil/scanner/token.go
new file mode 100644
index 00000000..b6c82ae9
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/token.go
@@ -0,0 +1,105 @@
+package scanner
+
+import (
+ "fmt"
+
+ "github.com/hashicorp/hil/ast"
+)
+
+type Token struct {
+ Type TokenType
+ Content string
+ Pos ast.Pos
+}
+
+//go:generate stringer -type=TokenType
+type TokenType rune
+
+const (
+ // Raw string data outside of ${ .. } sequences
+ LITERAL TokenType = 'o'
+
+ // STRING is like a LITERAL but it's inside a quoted string
+ // within a ${ ... } sequence, and so it can contain backslash
+ // escaping.
+ STRING TokenType = 'S'
+
+ // Other Literals
+ INTEGER TokenType = 'I'
+ FLOAT TokenType = 'F'
+ BOOL TokenType = 'B'
+
+ BEGIN TokenType = '$' // actually "${"
+ END TokenType = '}'
+ OQUOTE TokenType = '“' // Opening quote of a nested quoted sequence
+ CQUOTE TokenType = '”' // Closing quote of a nested quoted sequence
+ OPAREN TokenType = '('
+ CPAREN TokenType = ')'
+ OBRACKET TokenType = '['
+ CBRACKET TokenType = ']'
+ COMMA TokenType = ','
+
+ IDENTIFIER TokenType = 'i'
+
+ PERIOD TokenType = '.'
+ PLUS TokenType = '+'
+ MINUS TokenType = '-'
+ STAR TokenType = '*'
+ SLASH TokenType = '/'
+ PERCENT TokenType = '%'
+
+ AND TokenType = '∧'
+ OR TokenType = '∨'
+ BANG TokenType = '!'
+
+ EQUAL TokenType = '='
+ NOTEQUAL TokenType = '≠'
+ GT TokenType = '>'
+ LT TokenType = '<'
+ GTE TokenType = '≥'
+ LTE TokenType = '≤'
+
+ QUESTION TokenType = '?'
+ COLON TokenType = ':'
+
+ EOF TokenType = '␄'
+
+ // Produced for sequences that cannot be understood as valid tokens
+ // e.g. due to use of unrecognized punctuation.
+ INVALID TokenType = '�'
+)
+
+func (t *Token) String() string {
+ switch t.Type {
+ case EOF:
+ return "end of string"
+ case INVALID:
+ return fmt.Sprintf("invalid sequence %q", t.Content)
+ case INTEGER:
+ return fmt.Sprintf("integer %s", t.Content)
+ case FLOAT:
+ return fmt.Sprintf("float %s", t.Content)
+ case STRING:
+ return fmt.Sprintf("string %q", t.Content)
+ case LITERAL:
+ return fmt.Sprintf("literal %q", t.Content)
+ case OQUOTE:
+ return fmt.Sprintf("opening quote")
+ case CQUOTE:
+ return fmt.Sprintf("closing quote")
+ case AND:
+ return "&&"
+ case OR:
+ return "||"
+ case NOTEQUAL:
+ return "!="
+ case GTE:
+ return ">="
+ case LTE:
+ return "<="
+ default:
+ // The remaining token types have content that
+ // speaks for itself.
+ return fmt.Sprintf("%q", t.Content)
+ }
+}
diff --git a/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go b/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go
new file mode 100644
index 00000000..a602f5fd
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go
@@ -0,0 +1,51 @@
+// Code generated by "stringer -type=TokenType"; DO NOT EDIT
+
+package scanner
+
+import "fmt"
+
+const _TokenType_name = "BANGBEGINPERCENTOPARENCPARENSTARPLUSCOMMAMINUSPERIODSLASHCOLONLTEQUALGTQUESTIONBOOLFLOATINTEGERSTRINGOBRACKETCBRACKETIDENTIFIERLITERALENDOQUOTECQUOTEANDORNOTEQUALLTEGTEEOFINVALID"
+
+var _TokenType_map = map[TokenType]string{
+ 33: _TokenType_name[0:4],
+ 36: _TokenType_name[4:9],
+ 37: _TokenType_name[9:16],
+ 40: _TokenType_name[16:22],
+ 41: _TokenType_name[22:28],
+ 42: _TokenType_name[28:32],
+ 43: _TokenType_name[32:36],
+ 44: _TokenType_name[36:41],
+ 45: _TokenType_name[41:46],
+ 46: _TokenType_name[46:52],
+ 47: _TokenType_name[52:57],
+ 58: _TokenType_name[57:62],
+ 60: _TokenType_name[62:64],
+ 61: _TokenType_name[64:69],
+ 62: _TokenType_name[69:71],
+ 63: _TokenType_name[71:79],
+ 66: _TokenType_name[79:83],
+ 70: _TokenType_name[83:88],
+ 73: _TokenType_name[88:95],
+ 83: _TokenType_name[95:101],
+ 91: _TokenType_name[101:109],
+ 93: _TokenType_name[109:117],
+ 105: _TokenType_name[117:127],
+ 111: _TokenType_name[127:134],
+ 125: _TokenType_name[134:137],
+ 8220: _TokenType_name[137:143],
+ 8221: _TokenType_name[143:149],
+ 8743: _TokenType_name[149:152],
+ 8744: _TokenType_name[152:154],
+ 8800: _TokenType_name[154:162],
+ 8804: _TokenType_name[162:165],
+ 8805: _TokenType_name[165:168],
+ 9220: _TokenType_name[168:171],
+ 65533: _TokenType_name[171:178],
+}
+
+func (i TokenType) String() string {
+ if str, ok := _TokenType_map[i]; ok {
+ return str
+ }
+ return fmt.Sprintf("TokenType(%d)", i)
+}