diff --git a/pkg/yqlib/doc/Unique.md b/pkg/yqlib/doc/Unique.md new file mode 100644 index 0000000..06fabd3 --- /dev/null +++ b/pkg/yqlib/doc/Unique.md @@ -0,0 +1,82 @@ +This is used to filter out duplicated items in an array. + +## Unique array of scalars (string/numbers) +Given a sample.yml file of: +```yaml +- 1 +- 2 +- 3 +- 2 +``` +then +```bash +yq eval 'unique' sample.yml +``` +will output +```yaml +- 1 +- 2 +- 3 +``` + +## Unique nulls +Unique works on the node value, so it considers different representations of nulls to be different + +Given a sample.yml file of: +```yaml +- ~ +- null +- ~ +- null +``` +then +```bash +yq eval 'unique' sample.yml +``` +will output +```yaml +- ~ +- null +``` + +## Unique all nulls +Run against the node tag to unique all the nulls + +Given a sample.yml file of: +```yaml +- ~ +- null +- ~ +- null +``` +then +```bash +yq eval 'unique_by(tag)' sample.yml +``` +will output +```yaml +- ~ +``` + +## Unique array object fields +Given a sample.yml file of: +```yaml +- name: harry + pet: cat +- name: billy + pet: dog +- name: harry + pet: dog +``` +then +```bash +yq eval 'unique_by(.name)' sample.yml +``` +will output +```yaml +- name: harry + pet: cat +- name: billy + pet: dog +``` + diff --git a/pkg/yqlib/doc/headers/Unique.md b/pkg/yqlib/doc/headers/Unique.md new file mode 100644 index 0000000..50999ce --- /dev/null +++ b/pkg/yqlib/doc/headers/Unique.md @@ -0,0 +1 @@ +This is used to filter out duplicated items in an array. diff --git a/pkg/yqlib/expression_tokeniser.go b/pkg/yqlib/expression_tokeniser.go index ad0dd2c..9f00ed2 100644 --- a/pkg/yqlib/expression_tokeniser.go +++ b/pkg/yqlib/expression_tokeniser.go @@ -259,6 +259,8 @@ func initLexer() (*lex.Lexer, error) { lexer.Add([]byte(`sortKeys`), opToken(sortKeysOpType)) lexer.Add([]byte(`select`), opToken(selectOpType)) lexer.Add([]byte(`has`), opToken(hasOpType)) + lexer.Add([]byte(`unique`), opToken(uniqueOpType)) + lexer.Add([]byte(`unique_by`), opToken(uniqueByOpType)) lexer.Add([]byte(`explode`), opToken(explodeOpType)) lexer.Add([]byte(`or`), opToken(orOpType)) lexer.Add([]byte(`and`), opToken(andOpType)) diff --git a/pkg/yqlib/lib.go b/pkg/yqlib/lib.go index bf88329..a9ece18 100644 --- a/pkg/yqlib/lib.go +++ b/pkg/yqlib/lib.go @@ -99,6 +99,8 @@ var recursiveDescentOpType = &operationType{Type: "RECURSIVE_DESCENT", NumArgs: var selectOpType = &operationType{Type: "SELECT", NumArgs: 1, Precedence: 50, Handler: selectOperator} var hasOpType = &operationType{Type: "HAS", NumArgs: 1, Precedence: 50, Handler: hasOperator} +var uniqueOpType = &operationType{Type: "UNIQUE", NumArgs: 0, Precedence: 50, Handler: unique} +var uniqueByOpType = &operationType{Type: "UNIQUE_BY", NumArgs: 1, Precedence: 50, Handler: uniqueBy} var deleteChildOpType = &operationType{Type: "DELETE", NumArgs: 1, Precedence: 40, Handler: deleteChildOperator} type Operation struct { diff --git a/pkg/yqlib/operator_unique.go b/pkg/yqlib/operator_unique.go new file mode 100644 index 0000000..5de638a --- /dev/null +++ b/pkg/yqlib/operator_unique.go @@ -0,0 +1,59 @@ +package yqlib + +import ( + "github.com/elliotchance/orderedmap" + "container/list" + yaml "gopkg.in/yaml.v3" + "fmt" +) + +func unique(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { + selfExpression := &ExpressionNode{Operation: &Operation{OperationType: selfReferenceOpType}} + uniqueByExpression := &ExpressionNode{Operation: &Operation{OperationType: uniqueByOpType}, Rhs: selfExpression} + return uniqueBy(d, context, uniqueByExpression) + +} + +func uniqueBy(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { + + log.Debugf("-- uniqueBy Operator") + var results = list.New() + + + for el := context.MatchingNodes.Front(); el != nil; el = el.Next() { + candidate := el.Value.(*CandidateNode) + candidateNode := unwrapDoc(candidate.Node) + + if candidateNode.Kind != yaml.SequenceNode { + return Context{}, fmt.Errorf("Only arrays are supported for unique") + } + + var newMatches = orderedmap.NewOrderedMap() + for _, node := range candidateNode.Content { + child := &CandidateNode{Node: node} + rhs, err := d.GetMatchingNodes(context.SingleChildContext(child), expressionNode.Rhs) + + if err != nil { + return Context{}, err + } + + first := rhs.MatchingNodes.Front() + keyCandidate := first.Value.(*CandidateNode) + keyValue := keyCandidate.Node.Value + _, exists := newMatches.Get(keyValue) + + if !exists { + newMatches.Set(keyValue, child.Node) + } + } + resultNode := &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"} + for el := newMatches.Front(); el != nil; el = el.Next() { + resultNode.Content = append(resultNode.Content, el.Value.(*yaml.Node)) + } + + results.PushBack(candidate.CreateChild(nil, resultNode)) + } + + return context.ChildContext(results), nil + +} \ No newline at end of file diff --git a/pkg/yqlib/operator_unique_by_test.go b/pkg/yqlib/operator_unique_by_test.go new file mode 100644 index 0000000..0f0eb4a --- /dev/null +++ b/pkg/yqlib/operator_unique_by_test.go @@ -0,0 +1,50 @@ +package yqlib + +import ( + "testing" +) + +var uniqueOperatorScenarios = []expressionScenario{ + { + description: "Unique array of scalars (string/numbers)", + document: `[1,2,3,2]`, + expression: `unique`, + expected: []string{ + "D0, P[], (!!seq)::- 1\n- 2\n- 3\n", + }, + }, + { + description: "Unique nulls", + subdescription: "Unique works on the node value, so it considers different representations of nulls to be different", + document: `[~,null, ~, null]`, + expression: `unique`, + expected: []string{ + "D0, P[], (!!seq)::- ~\n- null\n", + }, + }, + { + description: "Unique all nulls", + subdescription: "Run against the node tag to unique all the nulls", + document: `[~,null, ~, null]`, + expression: `unique_by(tag)`, + expected: []string{ + "D0, P[], (!!seq)::- ~\n", + }, + }, + { + description: "Unique array object fields", + document: `[{name: harry, pet: cat}, {name: billy, pet: dog}, {name: harry, pet: dog}]`, + expression: `unique_by(.name)`, + expected: []string{ + "D0, P[], (!!seq)::- {name: harry, pet: cat}\n- {name: billy, pet: dog}\n", + }, + }, + +} + +func TestUniqueOperatorScenarios(t *testing.T) { + for _, tt := range uniqueOperatorScenarios { + testScenario(t, &tt) + } + documentScenarios(t, "Unique", uniqueOperatorScenarios) +}