diff --git a/pkg/yqlib/data_tree_navigator.go b/pkg/yqlib/data_tree_navigator.go new file mode 100644 index 0000000..203a45e --- /dev/null +++ b/pkg/yqlib/data_tree_navigator.go @@ -0,0 +1,23 @@ +package yqlib + +// import yaml "gopkg.in/yaml.v3" + +// type NodeLeafContext struct { +// Node *yaml.Node +// Head interface{} +// PathStack []interface{} +// } + +// func newNodeLeafContext(node *yaml.Node, head interface{}, tailpathStack []interface{}) NodeLeafContext { +// newPathStack := make([]interface{}, len(pathStack)) +// copy(newPathStack, pathStack) +// return NodeContext{ +// Node: node, +// Head: head, +// PathStack: newPathStack, +// } +// } + +// type DataTreeNavigator interface { +// Traverse(value *NodeLeafContext) +// } diff --git a/pkg/yqlib/data_tree_navigator_test.go b/pkg/yqlib/data_tree_navigator_test.go new file mode 100644 index 0000000..88c44e9 --- /dev/null +++ b/pkg/yqlib/data_tree_navigator_test.go @@ -0,0 +1 @@ +package yqlib diff --git a/pkg/yqlib/path_tokeniser.go b/pkg/yqlib/path_tokeniser.go index 009cdf6..ab2da6c 100644 --- a/pkg/yqlib/path_tokeniser.go +++ b/pkg/yqlib/path_tokeniser.go @@ -8,27 +8,35 @@ import ( "github.com/timtadh/lexmachine/machines" ) -var Literals []string // The tokens representing literal strings -var Keywords []string // The keyword tokens -var Tokens []string // All of the tokens (including literals and keywords) -var TokenIds map[string]int // A map from the token names to their int ids +var Literals []string // The tokens representing literal strings +var ClosingLiterals []string // The tokens representing literal strings +var Keywords []string // The keyword tokens +var Tokens []string // All of the tokens (including literals and keywords) +var TokenIds map[string]int // A map from the token names to their int ids func initTokens() { - Literals = []string{ + Literals = []string{ // these need a traverse operator infront "(", - ")", "[+]", "[*]", "**", } + ClosingLiterals = []string{ // these need a traverse operator after + ")", + } Tokens = []string{ + "BEGIN_SUB_EXPRESSION", + "END_SUB_EXPRESSION", "OR_OPERATOR", "AND_OPERATOR", "EQUALS_OPERATOR", + "EQUALS_SELF_OPERATOR", + "TRAVERSE_OPERATOR", "PATH_KEY", // apples - "ARRAY_INDEX", // 1234 + "ARRAY_INDEX", // 123 } Tokens = append(Tokens, Literals...) + Tokens = append(Tokens, ClosingLiterals...) TokenIds = make(map[string]int) for i, tok := range Tokens { TokenIds[tok] = i @@ -78,15 +86,20 @@ func initLexer() (*lex.Lexer, error) { r := "\\" + strings.Join(strings.Split(lit, ""), "\\") lexer.Add([]byte(r), token(lit)) } + for _, lit := range ClosingLiterals { + r := "\\" + strings.Join(strings.Split(lit, ""), "\\") + lexer.Add([]byte(r), token(lit)) + } lexer.Add([]byte(`([Oo][Rr])`), token("OR_OPERATOR")) lexer.Add([]byte(`([Aa][Nn][Dd])`), token("AND_OPERATOR")) - lexer.Add([]byte(`(==)`), token("EQUALS_OPERATOR")) + lexer.Add([]byte(`\.\s*==\s*`), token("EQUALS_SELF_OPERATOR")) + lexer.Add([]byte(`\s*==\s*`), token("EQUALS_OPERATOR")) lexer.Add([]byte(`\[-?[0-9]+\]`), numberToken("ARRAY_INDEX", true)) lexer.Add([]byte(`-?[0-9]+`), numberToken("ARRAY_INDEX", false)) lexer.Add([]byte("( |\t|\n|\r)+"), skip) lexer.Add([]byte(`"[^ "]+"`), wrappedToken("PATH_KEY")) lexer.Add([]byte(`[^ \.\[\(\)=]+`), token("PATH_KEY")) - lexer.Add([]byte(`\.`), skip) + lexer.Add([]byte(`\.`), token("TRAVERSE_OPERATOR")) err := lexer.Compile() if err != nil { return nil, err @@ -129,6 +142,22 @@ func (p *pathTokeniser) Tokenise(path string) ([]*lex.Token, error) { return nil, err } } + var postProcessedTokens []*lex.Token = make([]*lex.Token, 0) - return tokens, nil + for index, token := range tokens { + for _, literalTokenDef := range append(Literals, "ARRAY_INDEX") { + if index > 0 && token.Type == TokenIds[literalTokenDef] && tokens[index-1].Type != TokenIds["TRAVERSE_OPERATOR"] { + postProcessedTokens = append(postProcessedTokens, &lex.Token{Type: TokenIds["TRAVERSE_OPERATOR"], Value: "."}) + } + } + + postProcessedTokens = append(postProcessedTokens, token) + for _, literalTokenDef := range append(ClosingLiterals, "ARRAY_INDEX") { + if index != len(tokens)-1 && token.Type == TokenIds[literalTokenDef] && tokens[index+1].Type != TokenIds["TRAVERSE_OPERATOR"] { + postProcessedTokens = append(postProcessedTokens, &lex.Token{Type: TokenIds["TRAVERSE_OPERATOR"], Value: "."}) + } + } + } + + return postProcessedTokens, nil } diff --git a/pkg/yqlib/path_tokeniser_test.go b/pkg/yqlib/path_tokeniser_test.go index 75b0060..d5b2481 100644 --- a/pkg/yqlib/path_tokeniser_test.go +++ b/pkg/yqlib/path_tokeniser_test.go @@ -11,29 +11,42 @@ var tokeniserTests = []struct { expectedTokens []interface{} }{ // TODO: Ensure ALL documented examples have tests! sheesh - {"apples.BANANAS", append(make([]interface{}, 0), "apples", "BANANAS")}, - {"appl*.BANA*", append(make([]interface{}, 0), "appl*", "BANA*")}, - {"a.b.**", append(make([]interface{}, 0), "a", "b", "**")}, - {"a.\"=\".frog", append(make([]interface{}, 0), "a", "=", "frog")}, - {"a.b.*", append(make([]interface{}, 0), "a", "b", "*")}, - {"a.b.thin*", append(make([]interface{}, 0), "a", "b", "thin*")}, - {"a.b[0]", append(make([]interface{}, 0), "a", "b", int64(0))}, - {"a.b[*]", append(make([]interface{}, 0), "a", "b", "[*]")}, - {"a.b[-12]", append(make([]interface{}, 0), "a", "b", int64(-12))}, - {"a.b.0", append(make([]interface{}, 0), "a", "b", int64(0))}, - {"a.b.d[+]", append(make([]interface{}, 0), "a", "b", "d", "[+]")}, + {"apples.BANANAS", append(make([]interface{}, 0), "apples", ".", "BANANAS")}, + {"appl*.BANA*", append(make([]interface{}, 0), "appl*", ".", "BANA*")}, + {"a.b.**", append(make([]interface{}, 0), "a", ".", "b", ".", "**")}, + {"a.\"=\".frog", append(make([]interface{}, 0), "a", ".", "=", ".", "frog")}, + {"a.b.*", append(make([]interface{}, 0), "a", ".", "b", ".", "*")}, + {"a.b.thin*", append(make([]interface{}, 0), "a", ".", "b", ".", "thin*")}, + {"a.b[0]", append(make([]interface{}, 0), "a", ".", "b", ".", int64(0))}, + {"a.b.[0]", append(make([]interface{}, 0), "a", ".", "b", ".", int64(0))}, + {"a.b[*]", append(make([]interface{}, 0), "a", ".", "b", ".", "[*]")}, + {"a.b.[*]", append(make([]interface{}, 0), "a", ".", "b", ".", "[*]")}, + {"a.b[+]", append(make([]interface{}, 0), "a", ".", "b", ".", "[+]")}, + {"a.b.[+]", append(make([]interface{}, 0), "a", ".", "b", ".", "[+]")}, + {"a.b[-12]", append(make([]interface{}, 0), "a", ".", "b", ".", int64(-12))}, + {"a.b.0", append(make([]interface{}, 0), "a", ".", "b", ".", int64(0))}, + {"a.b.-12", append(make([]interface{}, 0), "a", ".", "b", ".", int64(-12))}, {"a", append(make([]interface{}, 0), "a")}, - {"\"a.b\".c", append(make([]interface{}, 0), "a.b", "c")}, - {`b."foo.bar"`, append(make([]interface{}, 0), "b", "foo.bar")}, - {"animals(.==cat)", append(make([]interface{}, 0), "animals", "(", "==", "cat", ")")}, // TODO validate this dot is not a join? - {"animals(.==c*)", append(make([]interface{}, 0), "animals", "(", "==", "c*", ")")}, // TODO validate this dot is not a join? - {"[1].a.d", append(make([]interface{}, 0), int64(1), "a", "d")}, - {"a[0].c", append(make([]interface{}, 0), "a", int64(0), "c")}, + {"\"a.b\".c", append(make([]interface{}, 0), "a.b", ".", "c")}, + {`b."foo.bar"`, append(make([]interface{}, 0), "b", ".", "foo.bar")}, + {"animals(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "cat", ")")}, + {"animals.(.==cat)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "cat", ")")}, + {"animals(. == cat)", append(make([]interface{}, 0), "animals", ".", "(", ". == ", "cat", ")")}, + {"animals(.==c*)", append(make([]interface{}, 0), "animals", ".", "(", ".==", "c*", ")")}, + {"animals(a.b==c*)", append(make([]interface{}, 0), "animals", ".", "(", "a", ".", "b", "==", "c*", ")")}, + {"animals.(a.b==c*)", append(make([]interface{}, 0), "animals", ".", "(", "a", ".", "b", "==", "c*", ")")}, + {"(a.b==c*).animals", append(make([]interface{}, 0), "(", "a", ".", "b", "==", "c*", ")", ".", "animals")}, + {"(a.b==c*)animals", append(make([]interface{}, 0), "(", "a", ".", "b", "==", "c*", ")", ".", "animals")}, + {"[1].a.d", append(make([]interface{}, 0), int64(1), ".", "a", ".", "d")}, + {"[1]a.d", append(make([]interface{}, 0), int64(1), ".", "a", ".", "d")}, + {"a[0]c", append(make([]interface{}, 0), "a", ".", int64(0), ".", "c")}, + {"a.[0].c", append(make([]interface{}, 0), "a", ".", int64(0), ".", "c")}, {"[0]", append(make([]interface{}, 0), int64(0))}, - {"a.cool(s.d.f==cool)", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", ")")}, - {"a.cool(s.d.f==cool OR t.b.h==frog).caterpillar", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", "OR", "t", "b", "h", "==", "frog", ")", "caterpillar")}, - {"a.cool(s.d.f==cool and t.b.h==frog)*", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", "and", "t", "b", "h", "==", "frog", ")", "*")}, - {"a.cool(s.d.f==cool and t.b.h==frog).th*", append(make([]interface{}, 0), "a", "cool", "(", "s", "d", "f", "==", "cool", "and", "t", "b", "h", "==", "frog", ")", "th*")}, + {"0", append(make([]interface{}, 0), int64(0))}, + {"a.cool(s.d.f == cool)", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", " == ", "cool", ")")}, + {"a.cool.(s.d.f==cool OR t.b.h==frog).caterpillar", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", "==", "cool", "OR", "t", ".", "b", ".", "h", "==", "frog", ")", ".", "caterpillar")}, + {"a.cool(s.d.f==cool and t.b.h==frog)*", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", "==", "cool", "and", "t", ".", "b", ".", "h", "==", "frog", ")", ".", "*")}, + {"a.cool(s.d.f==cool and t.b.h==frog).th*", append(make([]interface{}, 0), "a", ".", "cool", ".", "(", "s", ".", "d", ".", "f", "==", "cool", "and", "t", ".", "b", ".", "h", "==", "frog", ")", ".", "th*")}, } var tokeniser = NewPathTokeniser()