Issue 477: Improve handling of escaped substrings

This involves evaluating and interpolating \-escaped sequences
in double-quoted and undelimited strings.

Added tests on various edge-conditions, as well as ruling out bare
strings like '\true' as a bool.
This commit is contained in:
Eric Promislow
2017-04-27 13:24:09 -07:00
parent a94b5e0f4c
commit 122b697e41
2 changed files with 235 additions and 37 deletions

View File

@@ -16,6 +16,7 @@
package conf
import (
"encoding/hex"
"fmt"
"strings"
"unicode"
@@ -82,6 +83,10 @@ type lexer struct {
// nested arrays. The last state on the stack is used after a value has
// been lexed. Similarly for comments.
stack []stateFn
// Used for processing escapable substrings in double-quoted and raw strings
stringParts []string
stringStateFn stateFn
}
type item struct {
@@ -103,11 +108,12 @@ func (lx *lexer) nextItem() item {
func lex(input string) *lexer {
lx := &lexer{
input: input,
state: lexTop,
line: 1,
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
input: input,
state: lexTop,
line: 1,
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
stringParts: []string{},
}
return lx
}
@@ -127,10 +133,37 @@ func (lx *lexer) pop() stateFn {
}
func (lx *lexer) emit(typ itemType) {
lx.items <- item{typ, lx.input[lx.start:lx.pos], lx.line}
lx.items <- item{typ, strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos], lx.line}
lx.start = lx.pos
}
func (lx *lexer) emitString() {
var finalString string
if len(lx.stringParts) > 0 {
finalString = strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos]
lx.stringParts = []string{}
} else {
finalString = lx.input[lx.start:lx.pos]
}
lx.items <- item{itemString, finalString, lx.line}
lx.start = lx.pos
}
func (lx *lexer) addCurrentStringPart(offset int) {
lx.stringParts = append(lx.stringParts, lx.input[lx.start:lx.pos-offset])
lx.start = lx.pos
}
func (lx *lexer) addStringPart(s string) stateFn {
lx.stringParts = append(lx.stringParts, s)
lx.start = lx.pos
return lx.stringStateFn
}
func (lx *lexer) hasEscapedParts() bool {
return len(lx.stringParts) > 0
}
func (lx *lexer) next() (r rune) {
if lx.pos >= len(lx.input) {
lx.width = 0
@@ -453,6 +486,7 @@ func lexValue(lx *lexer) stateFn {
return lexQuotedString
case r == dqStringStart:
lx.ignore() // ignore the " or '
lx.stringStateFn = lexDubQuotedString
return lexDubQuotedString
case r == '-':
return lexNegNumberStart
@@ -468,6 +502,7 @@ func lexValue(lx *lexer) stateFn {
return lx.errorf("Expected value but found new line")
}
lx.backup()
lx.stringStateFn = lexString
return lexString
}
@@ -721,9 +756,12 @@ func lexQuotedString(lx *lexer) stateFn {
func lexDubQuotedString(lx *lexer) stateFn {
r := lx.next()
switch {
case r == '\\':
lx.addCurrentStringPart(1)
return lexStringEscape
case r == dqStringEnd:
lx.backup()
lx.emit(itemString)
lx.emitString()
lx.next()
lx.ignore()
return lx.pop()
@@ -736,6 +774,7 @@ func lexString(lx *lexer) stateFn {
r := lx.next()
switch {
case r == '\\':
lx.addCurrentStringPart(1)
return lexStringEscape
// Termination of non-quoted strings
case isNL(r) || r == eof || r == optValTerm ||
@@ -743,17 +782,19 @@ func lexString(lx *lexer) stateFn {
isWhitespace(r):
lx.backup()
if lx.isBool() {
if lx.hasEscapedParts() {
lx.emitString()
} else if lx.isBool() {
lx.emit(itemBool)
} else if lx.isVariable() {
lx.emit(itemVariable)
} else {
lx.emit(itemString)
lx.emitString()
}
return lx.pop()
case r == sqStringEnd:
lx.backup()
lx.emit(itemString)
lx.emitString()
lx.next()
lx.ignore()
return lx.pop()
@@ -803,15 +844,15 @@ func lexStringEscape(lx *lexer) stateFn {
case 'x':
return lexStringBinary
case 't':
fallthrough
return lx.addStringPart("\t")
case 'n':
fallthrough
return lx.addStringPart("\n")
case 'r':
fallthrough
return lx.addStringPart("\r")
case '"':
fallthrough
return lx.addStringPart("\"")
case '\\':
return lexString
return lx.addStringPart("\\")
}
return lx.errorf("Invalid escape character '%v'. Only the following "+
"escape characters are allowed: \\xXX, \\t, \\n, \\r, \\\", \\\\.", r)
@@ -821,17 +862,20 @@ func lexStringEscape(lx *lexer) stateFn {
// that the '\x' has already been consumed.
func lexStringBinary(lx *lexer) stateFn {
r := lx.next()
if !isHexadecimal(r) {
return lx.errorf("Expected two hexadecimal digits after '\\x', but "+
"got '%v' instead.", r)
if isNL(r) {
return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line")
}
r = lx.next()
if !isHexadecimal(r) {
return lx.errorf("Expected two hexadecimal digits after '\\x', but "+
"got '%v' instead.", r)
if isNL(r) {
return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line")
}
return lexString
offset := lx.pos - 2
byteString, err := hex.DecodeString(lx.input[offset:lx.pos])
if err != nil {
return lx.errorf("Expected two hexadecimal digits after '\\x', but got '%s'", lx.input[offset:lx.pos])
}
lx.addStringPart(string(byteString))
return lx.stringStateFn
}
// lexNumberOrDateStart consumes either a (positive) integer, a float, a datetime, or IP.
@@ -1028,12 +1072,6 @@ func isNL(r rune) bool {
return r == '\n' || r == '\r'
}
func isHexadecimal(r rune) bool {
return (r >= '0' && r <= '9') ||
(r >= 'a' && r <= 'f') ||
(r >= 'A' && r <= 'F')
}
func (itype itemType) String() string {
switch itype {
case itemError:

View File

@@ -65,10 +65,20 @@ func TestComplexStringValues(t *testing.T) {
func TestBinaryString(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "\\x22", 1},
{itemString, "e", 1},
{itemEOF, "", 1},
}
lx := lex("foo = \\x22")
lx := lex("foo = \\x65")
expect(t, lx, expectedItems)
}
func TestBinaryStringLatin1(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "\xe9", 1},
{itemEOF, "", 1},
}
lx := lex("foo = \\xe9")
expect(t, lx, expectedItems)
}
@@ -197,6 +207,46 @@ func TestSimpleKeyFloatValues(t *testing.T) {
expect(t, lx, expectedItems)
}
func TestBadBinaryStringEndingAfterZeroHexChars(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemError, "Expected two hexadecimal digits after '\\x', but hit end of line", 2},
{itemEOF, "", 1},
}
lx := lex("foo = xyz\\x\n")
expect(t, lx, expectedItems)
}
func TestBadBinaryStringEndingAfterOneHexChar(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemError, "Expected two hexadecimal digits after '\\x', but hit end of line", 2},
{itemEOF, "", 1},
}
lx := lex("foo = xyz\\xF\n")
expect(t, lx, expectedItems)
}
func TestBadBinaryStringWithZeroHexChars(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemError, "Expected two hexadecimal digits after '\\x', but got ']\"'", 1},
{itemEOF, "", 1},
}
lx := lex(`foo = "[\x]"`)
expect(t, lx, expectedItems)
}
func TestBadBinaryStringWithOneHexChar(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemError, "Expected two hexadecimal digits after '\\x', but got 'e]'", 1},
{itemEOF, "", 1},
}
lx := lex(`foo = "[\xe]"`)
expect(t, lx, expectedItems)
}
func TestBadFloatValues(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
@@ -269,7 +319,7 @@ func TestRawString(t *testing.T) {
lx := lex("foo = bar")
expect(t, lx, expectedItems)
lx = lex(`foo = bar' `)
lx = lex(`foo = bar' `) //'single-quote for emacs TODO: Remove me
expect(t, lx, expectedItems)
}
@@ -523,21 +573,131 @@ bs = \\
func TestEscapedString(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 2},
{itemString, `\t`, 2},
{itemString, "\t", 2},
{itemKey, "bar", 3},
{itemString, `\r`, 3},
{itemString, "\r", 3},
{itemKey, "baz", 4},
{itemString, `\n`, 4},
{itemString, "\n", 4},
{itemKey, "q", 5},
{itemString, `\"`, 5},
{itemString, "\"", 5},
{itemKey, "bs", 6},
{itemString, `\\`, 6},
{itemString, "\\", 6},
{itemEOF, "", 6},
}
lx := lex(escString)
expect(t, lx, expectedItems)
}
func TestCompoundStringES(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "\\end", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = "\\end"`)
expect(t, lx, expectedItems)
}
func TestCompoundStringSE(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "start\\", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = "start\\"`)
expect(t, lx, expectedItems)
}
func TestCompoundStringEE(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "Eq", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = \x45\x71`)
expect(t, lx, expectedItems)
}
func TestCompoundStringSEE(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "startEq", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = start\x45\x71`)
expect(t, lx, expectedItems)
}
func TestCompoundStringSES(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "start|end", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = start\x7Cend`)
expect(t, lx, expectedItems)
}
func TestCompoundStringEES(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "<>end", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = \x3c\x3eend`)
expect(t, lx, expectedItems)
}
func TestCompoundStringESE(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "<middle>", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = \x3cmiddle\x3E`)
expect(t, lx, expectedItems)
}
func TestNonBool(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "\\true", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = \\true`)
expect(t, lx, expectedItems)
}
func TestNonVariable(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "\\$var", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = \\$var`)
expect(t, lx, expectedItems)
}
func TestEmptyStringDQ(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = ""`)
expect(t, lx, expectedItems)
}
func TestEmptyStringSQ(t *testing.T) {
expectedItems := []item{
{itemKey, "foo", 1},
{itemString, "", 1},
{itemEOF, "", 2},
}
lx := lex(`foo = ''`)
expect(t, lx, expectedItems)
}
var nestedWhitespaceMap = `
foo {
host {