From dcde4c69b02fbabe71ee597ed6ac089654bedacf Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jan 2024 18:59:09 -0500 Subject: [PATCH 1/3] mlr reorder with regex support for field-name selection --- pkg/transformers/reorder.go | 117 +++++++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 16 deletions(-) diff --git a/pkg/transformers/reorder.go b/pkg/transformers/reorder.go index 216dd714da..3c0402b40c 100644 --- a/pkg/transformers/reorder.go +++ b/pkg/transformers/reorder.go @@ -4,6 +4,7 @@ import ( "container/list" "fmt" "os" + "regexp" "strings" "github.com/johnkerl/miller/pkg/cli" @@ -61,6 +62,7 @@ func transformerReorderParseCLI( argi++ var fieldNames []string = nil + doRegexes := false putAtEnd := false beforeFieldName := "" afterFieldName := "" @@ -82,6 +84,10 @@ func transformerReorderParseCLI( } else if opt == "-f" { fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else if opt == "-r" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + doRegexes = true + } else if opt == "-b" { beforeFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) afterFieldName = "" @@ -115,6 +121,7 @@ func transformerReorderParseCLI( transformer, err := NewTransformerReorder( fieldNames, + doRegexes, putAtEnd, beforeFieldName, afterFieldName, @@ -132,6 +139,7 @@ type TransformerReorder struct { // input fieldNames []string fieldNamesSet map[string]bool + regexes []*regexp.Regexp beforeFieldName string afterFieldName string @@ -141,6 +149,7 @@ type TransformerReorder struct { func NewTransformerReorder( fieldNames []string, + doRegexes bool, putAtEnd bool, beforeFieldName string, afterFieldName string, @@ -164,6 +173,23 @@ func NewTransformerReorder( lib.ReverseStringList(tr.fieldNames) } + if doRegexes { + tr.regexes = make([]*regexp.Regexp, len(fieldNames)) + for i, regexString := range fieldNames { + // Handles "a.*b"i Miller case-insensitive-regex specification + regex, err := lib.CompileMillerRegex(regexString) + if err != nil { + fmt.Fprintf( + os.Stderr, + "%s %s: cannot compile regex [%s]\n", + "mlr", verbNameCut, regexString, + ) + os.Exit(1) + } + tr.regexes[i] = regex + } + } + return tr, nil } @@ -188,10 +214,39 @@ func (tr *TransformerReorder) reorderToStart( ) { if !inrecAndContext.EndOfStream { inrec := inrecAndContext.Record - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) + + if tr.regexes == nil { + for _, fieldName := range tr.fieldNames { + inrec.MoveToHead(fieldName) + } + outputRecordsAndContexts.PushBack(inrecAndContext) + + } else { + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + outrec.PutReference(pe.Key, pe.Value) + found = true + break + } + } + if !found { + atEnds.PushBack(pe) + } + } + + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) } - outputRecordsAndContexts.PushBack(inrecAndContext) } else { outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker @@ -207,10 +262,38 @@ func (tr *TransformerReorder) reorderToEnd( ) { if !inrecAndContext.EndOfStream { inrec := inrecAndContext.Record - for _, fieldName := range tr.fieldNames { - inrec.MoveToTail(fieldName) + if tr.regexes == nil { + for _, fieldName := range tr.fieldNames { + inrec.MoveToTail(fieldName) + } + outputRecordsAndContexts.PushBack(inrecAndContext) + + } else { + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + atEnds.PushBack(pe) + found = true + break + } + } + if !found { + outrec.PutReference(pe.Key, pe.Value) + } + } + + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) } - outputRecordsAndContexts.PushBack(inrecAndContext) } else { outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker } @@ -240,8 +323,12 @@ func (tr *TransformerReorder) reorderBefore( if pe.Key == tr.beforeFieldName { break } - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) + if tr.regexes == nil { + if !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) + } + } else { + // XXX TO DO } } @@ -258,14 +345,15 @@ func (tr *TransformerReorder) reorderBefore( } for ; pe != nil; pe = pe.Next { - if pe.Key != tr.beforeFieldName && !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) + if tr.regexes == nil { + if pe.Key != tr.beforeFieldName && !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) + } + } else { + // XXX TO DO } } - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) - } outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) } else { @@ -320,9 +408,6 @@ func (tr *TransformerReorder) reorderAfter( } } - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) - } outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) } else { From b49bf70bc741539e83697a6424569d2dc7e6c139 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 21 Jan 2024 14:39:13 -0500 Subject: [PATCH 2/3] neaten --- pkg/transformers/aaa_record_transformer.go | 6 + pkg/transformers/reorder.go | 359 +++++++++++---------- 2 files changed, 186 insertions(+), 179 deletions(-) diff --git a/pkg/transformers/aaa_record_transformer.go b/pkg/transformers/aaa_record_transformer.go index 1f9bae7dd6..1be4fc9176 100644 --- a/pkg/transformers/aaa_record_transformer.go +++ b/pkg/transformers/aaa_record_transformer.go @@ -27,6 +27,12 @@ type RecordTransformerFunc func( outputDownstreamDoneChannel chan<- bool, ) +// Used within some verbs +type RecordTransformerHelperFunc func( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) + type TransformerUsageFunc func( ostream *os.File, ) diff --git a/pkg/transformers/reorder.go b/pkg/transformers/reorder.go index 3c0402b40c..4c4f7e2728 100644 --- a/pkg/transformers/reorder.go +++ b/pkg/transformers/reorder.go @@ -63,9 +63,8 @@ func transformerReorderParseCLI( var fieldNames []string = nil doRegexes := false - putAtEnd := false - beforeFieldName := "" - afterFieldName := "" + putAfter := false + centerFieldName := "" for argi < argc /* variable increment: 1 or 2 depending on flag */ { opt := args[argi] @@ -83,25 +82,23 @@ func transformerReorderParseCLI( } else if opt == "-f" { fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + doRegexes = false } else if opt == "-r" { fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) doRegexes = true } else if opt == "-b" { - beforeFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - afterFieldName = "" - putAtEnd = false + centerFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + putAfter = false } else if opt == "-a" { - afterFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - beforeFieldName = "" - putAtEnd = false + centerFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + putAfter = true } else if opt == "-e" { - putAtEnd = true - beforeFieldName = "" - afterFieldName = "" + putAfter = true + centerFieldName = "" } else { transformerReorderUsage(os.Stderr) @@ -122,9 +119,8 @@ func transformerReorderParseCLI( transformer, err := NewTransformerReorder( fieldNames, doRegexes, - putAtEnd, - beforeFieldName, - afterFieldName, + putAfter, + centerFieldName, ) if err != nil { fmt.Fprintln(os.Stderr, err) @@ -140,37 +136,48 @@ type TransformerReorder struct { fieldNames []string fieldNamesSet map[string]bool regexes []*regexp.Regexp - beforeFieldName string - afterFieldName string + centerFieldName string + putAfter bool // state - recordTransformerFunc RecordTransformerFunc + recordTransformerFunc RecordTransformerHelperFunc } func NewTransformerReorder( fieldNames []string, doRegexes bool, - putAtEnd bool, - beforeFieldName string, - afterFieldName string, + putAfter bool, + centerFieldName string, ) (*TransformerReorder, error) { tr := &TransformerReorder{ fieldNames: fieldNames, fieldNamesSet: lib.StringListToSet(fieldNames), - beforeFieldName: beforeFieldName, - afterFieldName: afterFieldName, + centerFieldName: centerFieldName, + putAfter: putAfter, } - if putAtEnd { - tr.recordTransformerFunc = tr.reorderToEnd - } else if beforeFieldName != "" { - tr.recordTransformerFunc = tr.reorderBefore - } else if afterFieldName != "" { - tr.recordTransformerFunc = tr.reorderAfter + if centerFieldName == "" { + if putAfter { + if doRegexes { + tr.recordTransformerFunc = tr.reorderToEndWithRegex + } else { + tr.recordTransformerFunc = tr.reorderToEndNoRegex + } + } else { + if doRegexes { + tr.recordTransformerFunc = tr.reorderToStartWithRegex + } else { + tr.recordTransformerFunc = tr.reorderToStartNoRegex + lib.ReverseStringList(tr.fieldNames) + } + } } else { - tr.recordTransformerFunc = tr.reorderToStart - lib.ReverseStringList(tr.fieldNames) + if doRegexes { + tr.recordTransformerFunc = tr.reorderBeforeOrAfterWithRegex + } else { + tr.recordTransformerFunc = tr.reorderBeforeOrAfterNoRegex + } } if doRegexes { @@ -193,8 +200,6 @@ func NewTransformerReorder( return tr, nil } -// ---------------------------------------------------------------- - func (tr *TransformerReorder) Transform( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext @@ -202,215 +207,211 @@ func (tr *TransformerReorder) Transform( outputDownstreamDoneChannel chan<- bool, ) { HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel) + if !inrecAndContext.EndOfStream { + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + ) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + } } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderToStart( +func (tr *TransformerReorder) reorderToStartNoRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - - if tr.regexes == nil { - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) - } - outputRecordsAndContexts.PushBack(inrecAndContext) - - } else { - outrec := mlrval.NewMlrmapAsRecord() - atEnds := list.New() - for pe := inrec.Head; pe != nil; pe = pe.Next { - found := false - for _, regex := range tr.regexes { - if regex.MatchString(pe.Key) { - outrec.PutReference(pe.Key, pe.Value) - found = true - break - } - } - if !found { - atEnds.PushBack(pe) - } - } + inrec := inrecAndContext.Record + for _, fieldName := range tr.fieldNames { + inrec.MoveToHead(fieldName) + } + outputRecordsAndContexts.PushBack(inrecAndContext) +} - for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { - // Ownership transfer; no copy needed - pe := atEnd.Value.(*mlrval.MlrmapEntry) +func (tr *TransformerReorder) reorderToStartWithRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { outrec.PutReference(pe.Key, pe.Value) + found = true + break } - - outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) - outputRecordsAndContexts.PushBack(outrecAndContext) } + if !found { + atEnds.PushBack(pe) + } + } - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderToEnd( +func (tr *TransformerReorder) reorderToEndNoRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - if tr.regexes == nil { - for _, fieldName := range tr.fieldNames { - inrec.MoveToTail(fieldName) - } - outputRecordsAndContexts.PushBack(inrecAndContext) + inrec := inrecAndContext.Record + for _, fieldName := range tr.fieldNames { + inrec.MoveToTail(fieldName) + } + outputRecordsAndContexts.PushBack(inrecAndContext) - } else { - outrec := mlrval.NewMlrmapAsRecord() - atEnds := list.New() - for pe := inrec.Head; pe != nil; pe = pe.Next { - found := false - for _, regex := range tr.regexes { - if regex.MatchString(pe.Key) { - atEnds.PushBack(pe) - found = true - break - } - } - if !found { - outrec.PutReference(pe.Key, pe.Value) - } - } +} - for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { - // Ownership transfer; no copy needed - pe := atEnd.Value.(*mlrval.MlrmapEntry) - outrec.PutReference(pe.Key, pe.Value) +func (tr *TransformerReorder) reorderToEndWithRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + atEnds.PushBack(pe) + found = true + break } - - outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) - outputRecordsAndContexts.PushBack(outrecAndContext) } - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + if !found { + outrec.PutReference(pe.Key, pe.Value) + } + } + + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderBefore( +func (tr *TransformerReorder) reorderBeforeOrAfterNoRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - if inrec.Get(tr.beforeFieldName) == nil { - outputRecordsAndContexts.PushBack(inrecAndContext) - return - } + inrec := inrecAndContext.Record + if inrec.Get(tr.centerFieldName) == nil { + outputRecordsAndContexts.PushBack(inrecAndContext) + return + } - outrec := mlrval.NewMlrmapAsRecord() - pe := inrec.Head + outrec := mlrval.NewMlrmapAsRecord() + pe := inrec.Head - // * inrec will be GC'ed - // * We will use outrec.PutReference not output.PutCopy since inrec will be GC'ed + // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed - for ; pe != nil; pe = pe.Next { - if pe.Key == tr.beforeFieldName { - break - } - if tr.regexes == nil { - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) - } - } else { - // XXX TO DO - } + for ; pe != nil; pe = pe.Next { + if pe.Key == tr.centerFieldName { + break } + if !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) + } + } + if !tr.putAfter { for _, fieldName := range tr.fieldNames { value := inrec.Get(fieldName) if value != nil { outrec.PutReference(fieldName, value) } } + } - value := inrec.Get(tr.beforeFieldName) - if value != nil { - outrec.PutReference(tr.beforeFieldName, value) - } + value := inrec.Get(tr.centerFieldName) + if value != nil { + outrec.PutReference(tr.centerFieldName, value) + } - for ; pe != nil; pe = pe.Next { - if tr.regexes == nil { - if pe.Key != tr.beforeFieldName && !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) - } - } else { - // XXX TO DO + if tr.putAfter { + for _, fieldName := range tr.fieldNames { + value := inrec.Get(fieldName) + if value != nil { + outrec.PutReference(fieldName, value) } } + } - outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) - - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + for ; pe != nil; pe = pe.Next { + if pe.Key != tr.centerFieldName && !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) + } } + + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) + } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderAfter( +func (tr *TransformerReorder) reorderBeforeOrAfterWithRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - if inrec.Get(tr.afterFieldName) == nil { - outputRecordsAndContexts.PushBack(inrecAndContext) - return - } + inrec := inrecAndContext.Record + if inrec.Get(tr.centerFieldName) == nil { + outputRecordsAndContexts.PushBack(inrecAndContext) + return + } - outrec := mlrval.NewMlrmapAsRecord() - pe := inrec.Head + outrec := mlrval.NewMlrmapAsRecord() + pe := inrec.Head - // * inrec will be GC'ed - // * We will use outrec.PutReference not output.PutCopy since inrec will be GC'ed + // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed - for ; pe != nil; pe = pe.Next { - if pe.Key == tr.afterFieldName { - break - } - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) - } + for ; pe != nil; pe = pe.Next { + if pe.Key == tr.centerFieldName { + break } - - value := inrec.Get(tr.afterFieldName) - if value != nil { - outrec.PutReference(tr.afterFieldName, value) + if !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) } + } + if !tr.putAfter { for _, fieldName := range tr.fieldNames { value := inrec.Get(fieldName) if value != nil { outrec.PutReference(fieldName, value) } } + } - for ; pe != nil; pe = pe.Next { - if pe.Key != tr.afterFieldName && !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) + value := inrec.Get(tr.centerFieldName) + if value != nil { + outrec.PutReference(tr.centerFieldName, value) + } + + if tr.putAfter { + for _, fieldName := range tr.fieldNames { + value := inrec.Get(fieldName) + if value != nil { + outrec.PutReference(fieldName, value) } } + } - outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) - - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + for ; pe != nil; pe = pe.Next { + if pe.Key != tr.centerFieldName && !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) + } } + + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) } From b2e62c854b3e86090974ef6aa88a4ffedaf4eed5 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 21 Jan 2024 15:11:52 -0500 Subject: [PATCH 3/3] -r -b/-a; unit-test cases --- pkg/transformers/reorder.go | 55 ++++++++------------- test/cases/verb-reorder/regex-after/cmd | 1 + test/cases/verb-reorder/regex-after/experr | 0 test/cases/verb-reorder/regex-after/expout | 10 ++++ test/cases/verb-reorder/regex-before/cmd | 1 + test/cases/verb-reorder/regex-before/experr | 0 test/cases/verb-reorder/regex-before/expout | 10 ++++ test/cases/verb-reorder/regex-end/cmd | 1 + test/cases/verb-reorder/regex-end/experr | 0 test/cases/verb-reorder/regex-end/expout | 10 ++++ test/cases/verb-reorder/regex-start/cmd | 1 + test/cases/verb-reorder/regex-start/experr | 0 test/cases/verb-reorder/regex-start/expout | 10 ++++ test/input/reorder-regex.nidx | 1 + 14 files changed, 66 insertions(+), 34 deletions(-) create mode 100644 test/cases/verb-reorder/regex-after/cmd create mode 100644 test/cases/verb-reorder/regex-after/experr create mode 100644 test/cases/verb-reorder/regex-after/expout create mode 100644 test/cases/verb-reorder/regex-before/cmd create mode 100644 test/cases/verb-reorder/regex-before/experr create mode 100644 test/cases/verb-reorder/regex-before/expout create mode 100644 test/cases/verb-reorder/regex-end/cmd create mode 100644 test/cases/verb-reorder/regex-end/experr create mode 100644 test/cases/verb-reorder/regex-end/expout create mode 100644 test/cases/verb-reorder/regex-start/cmd create mode 100644 test/cases/verb-reorder/regex-start/experr create mode 100644 test/cases/verb-reorder/regex-start/expout create mode 100644 test/input/reorder-regex.nidx diff --git a/pkg/transformers/reorder.go b/pkg/transformers/reorder.go index 4c4f7e2728..141b55c19a 100644 --- a/pkg/transformers/reorder.go +++ b/pkg/transformers/reorder.go @@ -370,45 +370,32 @@ func (tr *TransformerReorder) reorderBeforeOrAfterWithRegex( return } - outrec := mlrval.NewMlrmapAsRecord() - pe := inrec.Head - - // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed - - for ; pe != nil; pe = pe.Next { - if pe.Key == tr.centerFieldName { - break - } - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) - } - } - - if !tr.putAfter { - for _, fieldName := range tr.fieldNames { - value := inrec.Get(fieldName) - if value != nil { - outrec.PutReference(fieldName, value) + matchingFieldNamesSet := lib.NewOrderedMap() + for pe := inrec.Head; pe != nil; pe = pe.Next { + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + if pe.Key != tr.centerFieldName { + matchingFieldNamesSet.Put(pe.Key, pe.Value) + break + } } } } - value := inrec.Get(tr.centerFieldName) - if value != nil { - outrec.PutReference(tr.centerFieldName, value) - } - - if tr.putAfter { - for _, fieldName := range tr.fieldNames { - value := inrec.Get(fieldName) - if value != nil { - outrec.PutReference(fieldName, value) + // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed + outrec := mlrval.NewMlrmapAsRecord() + for pe := inrec.Head; pe != nil; pe = pe.Next { + if pe.Key == tr.centerFieldName { + if tr.putAfter { + outrec.PutReference(pe.Key, pe.Value) } - } - } - - for ; pe != nil; pe = pe.Next { - if pe.Key != tr.centerFieldName && !tr.fieldNamesSet[pe.Key] { + for pf := matchingFieldNamesSet.Head; pf != nil; pf = pf.Next { + outrec.PutReference(pf.Key, pf.Value.(*mlrval.Mlrval)) + } + if !tr.putAfter { + outrec.PutReference(pe.Key, pe.Value) + } + } else if !matchingFieldNamesSet.Has(pe.Key) { outrec.PutReference(pe.Key, pe.Value) } } diff --git a/test/cases/verb-reorder/regex-after/cmd b/test/cases/verb-reorder/regex-after/cmd new file mode 100644 index 0000000000..59a79f7f6d --- /dev/null +++ b/test/cases/verb-reorder/regex-after/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -a 6 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-after/experr b/test/cases/verb-reorder/regex-after/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-after/expout b/test/cases/verb-reorder/regex-after/expout new file mode 100644 index 0000000000..62cb82ad5c --- /dev/null +++ b/test/cases/verb-reorder/regex-after/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +6 f +3 c +8 h +9 i +7 g +10 j diff --git a/test/cases/verb-reorder/regex-before/cmd b/test/cases/verb-reorder/regex-before/cmd new file mode 100644 index 0000000000..f207567a86 --- /dev/null +++ b/test/cases/verb-reorder/regex-before/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -b 6 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-before/experr b/test/cases/verb-reorder/regex-before/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-before/expout b/test/cases/verb-reorder/regex-before/expout new file mode 100644 index 0000000000..ef4d4f1666 --- /dev/null +++ b/test/cases/verb-reorder/regex-before/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +3 c +8 h +9 i +6 f +7 g +10 j diff --git a/test/cases/verb-reorder/regex-end/cmd b/test/cases/verb-reorder/regex-end/cmd new file mode 100644 index 0000000000..8c3e21c81b --- /dev/null +++ b/test/cases/verb-reorder/regex-end/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -e test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-end/experr b/test/cases/verb-reorder/regex-end/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-end/expout b/test/cases/verb-reorder/regex-end/expout new file mode 100644 index 0000000000..7a7424aa94 --- /dev/null +++ b/test/cases/verb-reorder/regex-end/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +6 f +7 g +10 j +3 c +8 h +9 i diff --git a/test/cases/verb-reorder/regex-start/cmd b/test/cases/verb-reorder/regex-start/cmd new file mode 100644 index 0000000000..2020a1393e --- /dev/null +++ b/test/cases/verb-reorder/regex-start/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-start/experr b/test/cases/verb-reorder/regex-start/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-start/expout b/test/cases/verb-reorder/regex-start/expout new file mode 100644 index 0000000000..ee16332d97 --- /dev/null +++ b/test/cases/verb-reorder/regex-start/expout @@ -0,0 +1,10 @@ +3 c +8 h +9 i +1 a +2 b +4 d +5 e +6 f +7 g +10 j diff --git a/test/input/reorder-regex.nidx b/test/input/reorder-regex.nidx new file mode 100644 index 0000000000..6a76ef8fa8 --- /dev/null +++ b/test/input/reorder-regex.nidx @@ -0,0 +1 @@ +a b c d e f g h i j