diff --git a/conf/lex.go b/conf/lex.go index 8a7ce5f7..f47fb386 100644 --- a/conf/lex.go +++ b/conf/lex.go @@ -16,6 +16,7 @@ package conf import ( + "encoding/hex" "fmt" "strings" "unicode" @@ -82,6 +83,10 @@ type lexer struct { // nested arrays. The last state on the stack is used after a value has // been lexed. Similarly for comments. stack []stateFn + + // Used for processing escapable substrings in double-quoted and raw strings + stringParts []string + stringStateFn stateFn } type item struct { @@ -103,11 +108,12 @@ func (lx *lexer) nextItem() item { func lex(input string) *lexer { lx := &lexer{ - input: input, - state: lexTop, - line: 1, - items: make(chan item, 10), - stack: make([]stateFn, 0, 10), + input: input, + state: lexTop, + line: 1, + items: make(chan item, 10), + stack: make([]stateFn, 0, 10), + stringParts: []string{}, } return lx } @@ -127,10 +133,37 @@ func (lx *lexer) pop() stateFn { } func (lx *lexer) emit(typ itemType) { - lx.items <- item{typ, lx.input[lx.start:lx.pos], lx.line} + lx.items <- item{typ, strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos], lx.line} lx.start = lx.pos } +func (lx *lexer) emitString() { + var finalString string + if len(lx.stringParts) > 0 { + finalString = strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos] + lx.stringParts = []string{} + } else { + finalString = lx.input[lx.start:lx.pos] + } + lx.items <- item{itemString, finalString, lx.line} + lx.start = lx.pos +} + +func (lx *lexer) addCurrentStringPart(offset int) { + lx.stringParts = append(lx.stringParts, lx.input[lx.start:lx.pos-offset]) + lx.start = lx.pos +} + +func (lx *lexer) addStringPart(s string) stateFn { + lx.stringParts = append(lx.stringParts, s) + lx.start = lx.pos + return lx.stringStateFn +} + +func (lx *lexer) hasEscapedParts() bool { + return len(lx.stringParts) > 0 +} + func (lx *lexer) next() (r rune) { if lx.pos >= len(lx.input) { lx.width = 0 @@ -453,6 +486,7 @@ func lexValue(lx *lexer) stateFn { return lexQuotedString case r == dqStringStart: lx.ignore() // ignore the " or ' + lx.stringStateFn = lexDubQuotedString return lexDubQuotedString case r == '-': return lexNegNumberStart @@ -468,6 +502,7 @@ func lexValue(lx *lexer) stateFn { return lx.errorf("Expected value but found new line") } lx.backup() + lx.stringStateFn = lexString return lexString } @@ -721,9 +756,12 @@ func lexQuotedString(lx *lexer) stateFn { func lexDubQuotedString(lx *lexer) stateFn { r := lx.next() switch { + case r == '\\': + lx.addCurrentStringPart(1) + return lexStringEscape case r == dqStringEnd: lx.backup() - lx.emit(itemString) + lx.emitString() lx.next() lx.ignore() return lx.pop() @@ -736,6 +774,7 @@ func lexString(lx *lexer) stateFn { r := lx.next() switch { case r == '\\': + lx.addCurrentStringPart(1) return lexStringEscape // Termination of non-quoted strings case isNL(r) || r == eof || r == optValTerm || @@ -743,17 +782,19 @@ func lexString(lx *lexer) stateFn { isWhitespace(r): lx.backup() - if lx.isBool() { + if lx.hasEscapedParts() { + lx.emitString() + } else if lx.isBool() { lx.emit(itemBool) } else if lx.isVariable() { lx.emit(itemVariable) } else { - lx.emit(itemString) + lx.emitString() } return lx.pop() case r == sqStringEnd: lx.backup() - lx.emit(itemString) + lx.emitString() lx.next() lx.ignore() return lx.pop() @@ -803,15 +844,15 @@ func lexStringEscape(lx *lexer) stateFn { case 'x': return lexStringBinary case 't': - fallthrough + return lx.addStringPart("\t") case 'n': - fallthrough + return lx.addStringPart("\n") case 'r': - fallthrough + return lx.addStringPart("\r") case '"': - fallthrough + return lx.addStringPart("\"") case '\\': - return lexString + return lx.addStringPart("\\") } return lx.errorf("Invalid escape character '%v'. Only the following "+ "escape characters are allowed: \\xXX, \\t, \\n, \\r, \\\", \\\\.", r) @@ -821,17 +862,20 @@ func lexStringEscape(lx *lexer) stateFn { // that the '\x' has already been consumed. func lexStringBinary(lx *lexer) stateFn { r := lx.next() - if !isHexadecimal(r) { - return lx.errorf("Expected two hexadecimal digits after '\\x', but "+ - "got '%v' instead.", r) + if isNL(r) { + return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line") } - r = lx.next() - if !isHexadecimal(r) { - return lx.errorf("Expected two hexadecimal digits after '\\x', but "+ - "got '%v' instead.", r) + if isNL(r) { + return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line") } - return lexString + offset := lx.pos - 2 + byteString, err := hex.DecodeString(lx.input[offset:lx.pos]) + if err != nil { + return lx.errorf("Expected two hexadecimal digits after '\\x', but got '%s'", lx.input[offset:lx.pos]) + } + lx.addStringPart(string(byteString)) + return lx.stringStateFn } // lexNumberOrDateStart consumes either a (positive) integer, a float, a datetime, or IP. @@ -1028,12 +1072,6 @@ func isNL(r rune) bool { return r == '\n' || r == '\r' } -func isHexadecimal(r rune) bool { - return (r >= '0' && r <= '9') || - (r >= 'a' && r <= 'f') || - (r >= 'A' && r <= 'F') -} - func (itype itemType) String() string { switch itype { case itemError: diff --git a/conf/lex_test.go b/conf/lex_test.go index 86655576..99313b89 100644 --- a/conf/lex_test.go +++ b/conf/lex_test.go @@ -65,10 +65,20 @@ func TestComplexStringValues(t *testing.T) { func TestBinaryString(t *testing.T) { expectedItems := []item{ {itemKey, "foo", 1}, - {itemString, "\\x22", 1}, + {itemString, "e", 1}, {itemEOF, "", 1}, } - lx := lex("foo = \\x22") + lx := lex("foo = \\x65") + expect(t, lx, expectedItems) +} + +func TestBinaryStringLatin1(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "\xe9", 1}, + {itemEOF, "", 1}, + } + lx := lex("foo = \\xe9") expect(t, lx, expectedItems) } @@ -197,6 +207,46 @@ func TestSimpleKeyFloatValues(t *testing.T) { expect(t, lx, expectedItems) } +func TestBadBinaryStringEndingAfterZeroHexChars(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemError, "Expected two hexadecimal digits after '\\x', but hit end of line", 2}, + {itemEOF, "", 1}, + } + lx := lex("foo = xyz\\x\n") + expect(t, lx, expectedItems) +} + +func TestBadBinaryStringEndingAfterOneHexChar(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemError, "Expected two hexadecimal digits after '\\x', but hit end of line", 2}, + {itemEOF, "", 1}, + } + lx := lex("foo = xyz\\xF\n") + expect(t, lx, expectedItems) +} + +func TestBadBinaryStringWithZeroHexChars(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemError, "Expected two hexadecimal digits after '\\x', but got ']\"'", 1}, + {itemEOF, "", 1}, + } + lx := lex(`foo = "[\x]"`) + expect(t, lx, expectedItems) +} + +func TestBadBinaryStringWithOneHexChar(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemError, "Expected two hexadecimal digits after '\\x', but got 'e]'", 1}, + {itemEOF, "", 1}, + } + lx := lex(`foo = "[\xe]"`) + expect(t, lx, expectedItems) +} + func TestBadFloatValues(t *testing.T) { expectedItems := []item{ {itemKey, "foo", 1}, @@ -269,7 +319,7 @@ func TestRawString(t *testing.T) { lx := lex("foo = bar") expect(t, lx, expectedItems) - lx = lex(`foo = bar' `) + lx = lex(`foo = bar' `) //'single-quote for emacs TODO: Remove me expect(t, lx, expectedItems) } @@ -523,21 +573,131 @@ bs = \\ func TestEscapedString(t *testing.T) { expectedItems := []item{ {itemKey, "foo", 2}, - {itemString, `\t`, 2}, + {itemString, "\t", 2}, {itemKey, "bar", 3}, - {itemString, `\r`, 3}, + {itemString, "\r", 3}, {itemKey, "baz", 4}, - {itemString, `\n`, 4}, + {itemString, "\n", 4}, {itemKey, "q", 5}, - {itemString, `\"`, 5}, + {itemString, "\"", 5}, {itemKey, "bs", 6}, - {itemString, `\\`, 6}, + {itemString, "\\", 6}, {itemEOF, "", 6}, } lx := lex(escString) expect(t, lx, expectedItems) } +func TestCompoundStringES(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "\\end", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = "\\end"`) + expect(t, lx, expectedItems) +} + +func TestCompoundStringSE(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "start\\", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = "start\\"`) + expect(t, lx, expectedItems) +} + +func TestCompoundStringEE(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "Eq", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = \x45\x71`) + expect(t, lx, expectedItems) +} + +func TestCompoundStringSEE(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "startEq", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = start\x45\x71`) + expect(t, lx, expectedItems) +} + +func TestCompoundStringSES(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "start|end", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = start\x7Cend`) + expect(t, lx, expectedItems) +} + +func TestCompoundStringEES(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "<>end", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = \x3c\x3eend`) + expect(t, lx, expectedItems) +} + +func TestCompoundStringESE(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = \x3cmiddle\x3E`) + expect(t, lx, expectedItems) +} + +func TestNonBool(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "\\true", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = \\true`) + expect(t, lx, expectedItems) +} + +func TestNonVariable(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "\\$var", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = \\$var`) + expect(t, lx, expectedItems) +} + +func TestEmptyStringDQ(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = ""`) + expect(t, lx, expectedItems) +} + +func TestEmptyStringSQ(t *testing.T) { + expectedItems := []item{ + {itemKey, "foo", 1}, + {itemString, "", 1}, + {itemEOF, "", 2}, + } + lx := lex(`foo = ''`) + expect(t, lx, expectedItems) +} + var nestedWhitespaceMap = ` foo { host {