Added support for integer suffixes, e.g. 1k, 8mb

2026-04-02 03:38:42 -07:00 · 2016-11-20 17:39:04 -08:00
parent 5c69f82d84
commit 6f9e4d6512
4 changed files with 182 additions and 33 deletions
--- a/conf/lex.go
+++ b/conf/lex.go
@@ -17,6 +17,7 @@ package conf

 import (
 	"fmt"
+	"unicode"
 	"unicode/utf8"
 )

@@ -182,7 +183,7 @@ func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
 // lexTop consumes elements at the top level of data structure.
 func lexTop(lx *lexer) stateFn {
 	r := lx.next()
-	if isWhitespace(r) || isNL(r) {
+	if unicode.IsSpace(r) {
 		return lexSkip(lx, lexTop)
 	}

@@ -248,7 +249,7 @@ func lexKeyStart(lx *lexer) stateFn {
 	switch {
 	case isKeySeparator(r):
 		return lx.errorf("Unexpected key separator '%v'", r)
-	case isWhitespace(r) || isNL(r):
+	case unicode.IsSpace(r):
 		lx.next()
 		return lexSkip(lx, lexKeyStart)
 	case r == dqStringStart:
@@ -291,7 +292,7 @@ func lexQuotedKey(lx *lexer) stateFn {
 // is not whitespace) has already been consumed.
 func lexKey(lx *lexer) stateFn {
 	r := lx.peek()
-	if isWhitespace(r) || isNL(r) || isKeySeparator(r) || r == eof {
+	if unicode.IsSpace(r) || isKeySeparator(r) || r == eof {
 		lx.emit(itemKey)
 		return lexKeyEnd
 	}
@@ -305,7 +306,7 @@ func lexKey(lx *lexer) stateFn {
 func lexKeyEnd(lx *lexer) stateFn {
 	r := lx.next()
 	switch {
-	case isWhitespace(r) || isNL(r):
+	case unicode.IsSpace(r):
 		return lexSkip(lx, lexKeyEnd)
 	case isKeySeparator(r):
 		return lexSkip(lx, lexValue)
@@ -345,11 +346,11 @@ func lexValue(lx *lexer) stateFn {
 		lx.ignore() // ignore the " or '
 		return lexDubQuotedString
 	case r == '-':
-		return lexNumberStart
+		return lexNegNumberStart
 	case r == blockStart:
 		lx.ignore()
 		return lexBlock
-	case isDigit(r):
+	case unicode.IsDigit(r):
 		lx.backup() // avoid an extra state and use the same as above
 		return lexNumberOrDateOrIPStart
 	case r == '.': // special error case, be kind to users
@@ -366,7 +367,7 @@ func lexValue(lx *lexer) stateFn {
 func lexArrayValue(lx *lexer) stateFn {
 	r := lx.next()
 	switch {
-	case isWhitespace(r) || isNL(r):
+	case unicode.IsSpace(r):
 		return lexSkip(lx, lexArrayValue)
 	case r == commentHashStart:
 		lx.push(lexArrayValue)
@@ -433,7 +434,7 @@ func lexMapKeyStart(lx *lexer) stateFn {
 	switch {
 	case isKeySeparator(r):
 		return lx.errorf("Unexpected key separator '%v'.", r)
-	case isWhitespace(r) || isNL(r):
+	case unicode.IsSpace(r):
 		lx.next()
 		return lexSkip(lx, lexMapKeyStart)
 	case r == mapEnd:
@@ -491,7 +492,7 @@ func lexMapDubQuotedKey(lx *lexer) stateFn {
 // is not whitespace) has already been consumed.
 func lexMapKey(lx *lexer) stateFn {
 	r := lx.peek()
-	if isWhitespace(r) || isNL(r) || isKeySeparator(r) {
+	if unicode.IsSpace(r) || isKeySeparator(r) {
 		lx.emit(itemKey)
 		return lexMapKeyEnd
 	}
@@ -505,7 +506,7 @@ func lexMapKey(lx *lexer) stateFn {
 func lexMapKeyEnd(lx *lexer) stateFn {
 	r := lx.next()
 	switch {
-	case isWhitespace(r) || isNL(r):
+	case unicode.IsSpace(r):
 		return lexSkip(lx, lexMapKeyEnd)
 	case isKeySeparator(r):
 		return lexSkip(lx, lexMapValue)
@@ -521,7 +522,7 @@ func lexMapKeyEnd(lx *lexer) stateFn {
 func lexMapValue(lx *lexer) stateFn {
 	r := lx.next()
 	switch {
-	case isWhitespace(r) || isNL(r):
+	case unicode.IsSpace(r):
 		return lexSkip(lx, lexMapValue)
 	case r == mapValTerm:
 		return lx.errorf("Unexpected map value terminator %q.", mapValTerm)
@@ -722,7 +723,7 @@ func lexStringBinary(lx *lexer) stateFn {
 // It assumes that NO negative sign has been consumed, that is triggered above.
 func lexNumberOrDateOrIPStart(lx *lexer) stateFn {
 	r := lx.next()
-	if !isDigit(r) {
+	if !unicode.IsDigit(r) {
 		if r == '.' {
 			return lx.errorf("Floats must start with a digit, not '.'.")
 		}
@@ -740,10 +741,12 @@ func lexNumberOrDateOrIP(lx *lexer) stateFn {
 			return lx.errorf("All ISO8601 dates must be in full Zulu form.")
 		}
 		return lexDateAfterYear
-	case isDigit(r):
+	case unicode.IsDigit(r):
 		return lexNumberOrDateOrIP
 	case r == '.':
-		return lexFloatStart
+		return lexFloatStart // Assume float at first, but could be IP
+	case isNumberSuffix(r):
+		return lexConvenientNumber
 	}

 	lx.backup()
@@ -751,6 +754,18 @@ func lexNumberOrDateOrIP(lx *lexer) stateFn {
 	return lx.pop()
 }

+// lexConvenientNumber is when we have a suffix, e.g. 1k or 1Mb
+func lexConvenientNumber(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case r == 'b' || r == 'B':
+		return lexConvenientNumber
+	}
+	lx.backup()
+	lx.emit(itemInteger)
+	return lx.pop()
+}
+
 // lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
 // It assumes that "YYYY-" has already been consumed.
 func lexDateAfterYear(lx *lexer) stateFn {
@@ -765,7 +780,7 @@ func lexDateAfterYear(lx *lexer) stateFn {
 	for _, f := range formats {
 		r := lx.next()
 		if f == '0' {
-			if !isDigit(r) {
+			if !unicode.IsDigit(r) {
 				return lx.errorf("Expected digit in ISO8601 datetime, "+
 					"but found '%v' instead.", r)
 			}
@@ -778,29 +793,31 @@ func lexDateAfterYear(lx *lexer) stateFn {
 	return lx.pop()
 }

-// lexNumberStart consumes either an integer or a float. It assumes that a
+// lexNegNumberStart consumes either an integer or a float. It assumes that a
 // negative sign has already been read, but that *no* digits have been consumed.
-// lexNumberStart will move to the appropriate integer or float states.
-func lexNumberStart(lx *lexer) stateFn {
+// lexNegNumberStart will move to the appropriate integer or float states.
+func lexNegNumberStart(lx *lexer) stateFn {
 	// we MUST see a digit. Even floats have to start with a digit.
 	r := lx.next()
-	if !isDigit(r) {
+	if !unicode.IsDigit(r) {
 		if r == '.' {
 			return lx.errorf("Floats must start with a digit, not '.'.")
 		}
 		return lx.errorf("Expected a digit but got '%v'.", r)
 	}
-	return lexNumber
+	return lexNegNumber
 }

-// lexNumber consumes an integer or a float after seeing the first digit.
-func lexNumber(lx *lexer) stateFn {
+// lexNumber consumes a negative integer or a float after seeing the first digit.
+func lexNegNumber(lx *lexer) stateFn {
 	r := lx.next()
 	switch {
-	case isDigit(r):
-		return lexNumber
+	case unicode.IsDigit(r):
+		return lexNegNumber
 	case r == '.':
 		return lexFloatStart
+	case isNumberSuffix(r):
+		return lexConvenientNumber
 	}
 	lx.backup()
 	lx.emit(itemInteger)
@@ -811,7 +828,7 @@ func lexNumber(lx *lexer) stateFn {
 // Namely, at least one digit is required.
 func lexFloatStart(lx *lexer) stateFn {
 	r := lx.next()
-	if !isDigit(r) {
+	if !unicode.IsDigit(r) {
 		return lx.errorf("Floats must have a digit after the '.', but got "+
 			"'%v' instead.", r)
 	}
@@ -822,7 +839,7 @@ func lexFloatStart(lx *lexer) stateFn {
 // Assumes that one digit has been consumed after a '.' already.
 func lexFloat(lx *lexer) stateFn {
 	r := lx.next()
-	if isDigit(r) {
+	if unicode.IsDigit(r) {
 		return lexFloat
 	}

@@ -839,7 +856,7 @@ func lexFloat(lx *lexer) stateFn {
 // lexIPAddr consumes IP addrs, like 127.0.0.1:4222
 func lexIPAddr(lx *lexer) stateFn {
 	r := lx.next()
-	if isDigit(r) || r == '.' || r == ':' {
+	if unicode.IsDigit(r) || r == '.' || r == ':' {
 		return lexIPAddr
 	}
 	lx.backup()
@@ -876,6 +893,11 @@ func lexSkip(lx *lexer, nextState stateFn) stateFn {
 	}
 }

+// Tests to see if we have a number suffix
+func isNumberSuffix(r rune) bool {
+	return r == 'k' || r == 'K' || r == 'm' || r == 'M' || r == 'g' || r == 'G'
+}
+
 // Tests for both key separators
 func isKeySeparator(r rune) bool {
 	return r == keySepEqual || r == keySepColon
@@ -891,10 +913,6 @@ func isNL(r rune) bool {
 	return r == '\n' || r == '\r'
 }

-func isDigit(r rune) bool {
-	return r >= '0' && r <= '9'
-}
-
 func isHexadecimal(r rune) bool {
 	return (r >= '0' && r <= '9') ||
 		(r >= 'a' && r <= 'f') ||