diff --git a/tests/golden/pr2308.json b/tests/golden/pr2308.json new file mode 100644 index 000000000..21ea18d16 --- /dev/null +++ b/tests/golden/pr2308.json @@ -0,0 +1,4 @@ +[ +{"Total": "Total", "count": 2, "A_keymax": [2.0]}, +{"Total": "Selected", "count": 0} +] diff --git a/tests/pr2308.vdj b/tests/pr2308.vdj new file mode 100644 index 000000000..bbc22f65b --- /dev/null +++ b/tests/pr2308.vdj @@ -0,0 +1,5 @@ +#!vd -p +{"sheet": null, "col": null, "row": null, "longname": "open-file", "input": "tests/small.json", "keystrokes": "o", "comment": null} +{"sheet": "small", "col": "id", "row": "", "longname": "key-col", "input": "", "keystrokes": "!", "comment": "toggle current column as a key column"} +{"sheet": "small", "col": "A", "row": "", "longname": "aggregate-col", "input": "keymax", "keystrokes": "+", "comment": "Add aggregator to current column"} +{"sheet": "small", "col": "", "row": "", "longname": "freq-summary", "input": "", "keystrokes": "zShift+F", "comment": "open one-line summary for all rows and selected rows"} diff --git a/tests/small.json b/tests/small.json new file mode 100644 index 000000000..a8c6e78a1 --- /dev/null +++ b/tests/small.json @@ -0,0 +1,4 @@ +[ +{"id": 1.0, "A": 1}, +{"id": 2.0, "A": 1} +] diff --git a/visidata/aggregators.py b/visidata/aggregators.py index ef12805ff..508a6d525 100644 --- a/visidata/aggregators.py +++ b/visidata/aggregators.py @@ -5,7 +5,7 @@ import statistics from visidata import Progress, Sheet, Column, ColumnsSheet, VisiData -from visidata import vd, anytype, vlen, asyncthread, wrapply, AttrDict +from visidata import vd, anytype, vlen, asyncthread, wrapply, AttrDict, date vd.help_aggregators = '''# Choose Aggregators Start typing an aggregator name or description. @@ -51,7 +51,11 @@ def getValues(self, rows): def aggregators_get(col): 'A space-separated names of aggregators on this column.' - return list(vd.aggregators[k] for k in (col.aggstr or '').split()) + aggs = [] + for k in (col.aggstr or '').split(): + agg = vd.aggregators[k] + aggs += agg if isinstance(agg, list) else [agg] + return aggs def aggregators_set(col, aggs): if isinstance(aggs, str): @@ -94,7 +98,7 @@ def _funcRows(col, rows): # wrap builtins so they can have a .type except Exception as e: if len(vals) == 0: return None - return e + raise e vd.aggregators[name] = _defaggr(name, type, _funcRows, funcValues=funcValues, helpstr=helpstr) # accepts a srccol + list of rows @@ -135,7 +139,9 @@ def _percentile(N, percent, key=lambda x:x): @functools.lru_cache(100) def percentile(pct, helpstr=''): - return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr=helpstr) + return _defaggr('p%s'%pct, None, + lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100, + key=(lambda d: d.timestamp()) if col.type is date else lambda x:x), helpstr=helpstr) def quantiles(q, helpstr): return [percentile(round(100*i/q), helpstr) for i in range(1, q)] @@ -149,7 +155,7 @@ def quantiles(q, helpstr): vd.aggregator('sum', vsum, 'sum of values') vd.aggregator('distinct', set, 'distinct values', type=vlen) vd.aggregator('count', lambda values: sum(1 for v in values), 'number of values', type=int) -vd.aggregator('list', list, 'list of values') +vd.aggregator('list', list, 'list of values', type=anytype) vd.aggregator('stdev', statistics.stdev, 'standard deviation of values', type=float) vd.aggregators['q3'] = quantiles(3, 'tertiles (33/66th pctile)') @@ -162,8 +168,24 @@ def quantiles(q, helpstr): for pct in (10, 20, 25, 30, 33, 40, 50, 60, 67, 70, 75, 80, 90, 95, 99): vd.aggregators[f'p{pct}'] = percentile(pct, f'{pct}th percentile') -# returns keys of the row with the max value -vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), helpstr='key of the maximum value') +def keyfunc(aggr_func): + '''Return the key of the row that results from applying *aggr_func* to *rows*. + Return None if *rows* is an empty list. + *aggr_func* takes a list of (value, row) tuples, one for each row in the column, + excluding rows where the column holds null and error values. + *aggr_func* must also take the parameters *default* and *key*, as max() does: + https://docs.python.org/3/library/functions.html#max''' + def key_aggr_func(col, rows): + if not col.sheet.keyCols: + vd.error('key aggregator function requires one or more key columns') + return None + # convert dicts to lists because functions like max() can't compare dicts + sortkey = lambda t: (t[0], sorted(t[1].items())) if isinstance(t[1], dict) else t + row = aggr_func(col.getValueRows(rows), default=(None, None), key=sortkey)[1] + return col.sheet.rowkey(row) if row else None + return key_aggr_func +vd.aggregators['keymax'] = _defaggr('keymax', anytype, keyfunc(max), helpstr='key of the maximum value') +vd.aggregators['keymin'] = _defaggr('keymin', anytype, keyfunc(min), helpstr='key of the minimum value') ColumnsSheet.columns += [ @@ -175,7 +197,7 @@ def quantiles(q, helpstr): @Sheet.api def addAggregators(sheet, cols, aggrnames): - 'Add each aggregator in list of *aggrnames* to each of *cols*.' + 'Add each aggregator in list of *aggrnames* to each of *cols*. Ignores names that are not valid.' for aggrname in aggrnames: aggrs = vd.aggregators.get(aggrname) aggrs = aggrs if isinstance(aggrs, list) else [aggrs] @@ -194,14 +216,19 @@ def aggname(col, agg): @Column.api @asyncthread -def memo_aggregate(col, agg, rows): +def memo_aggregate(col, agg_choices, rows): 'Show aggregated value in status, and add to memory.' - aggval = agg(col, rows) - typedval = wrapply(agg.type or col.type, aggval) - dispval = col.format(typedval) - k = col.name+'_'+agg.name - vd.status(f'{k}={dispval}') - vd.memory[k] = typedval + for agg_choice in agg_choices: + agg = vd.aggregators.get(agg_choice) + if not agg: continue + aggs = agg if isinstance(agg, list) else [agg] + for agg in aggs: + aggval = agg(col, rows) + typedval = wrapply(agg.type or col.type, aggval) + dispval = col.format(typedval) + k = col.name+'_'+agg.name + vd.status(f'{k}={dispval}') + vd.memory[k] = typedval @VisiData.property @@ -215,6 +242,7 @@ def aggregator_choices(vd): @VisiData.api def chooseAggregators(vd): + '''Return a list of aggregator name strings chosen or entered by the user. User-entered names may be invalid.''' prompt = 'choose aggregators: ' def _fmt_aggr_summary(match, row, trigger_key): formatted_aggrname = match.formatted.get('key', row.key) if match else row.key @@ -235,12 +263,15 @@ def _fmt_aggr_summary(match, row, trigger_key): multiple=True) aggrs = r.split() + valid_choices = vd.aggregators.keys() for aggr in aggrs: vd.usedInputs[aggr] += 1 + if aggr not in valid_choices: + vd.warning(f'aggregator does not exist: {aggr}') return aggrs Sheet.addCommand('+', 'aggregate-col', 'addAggregators([cursorCol], chooseAggregators())', 'Add aggregator to current column') -Sheet.addCommand('z+', 'memo-aggregate', 'for agg in chooseAggregators(): cursorCol.memo_aggregate(aggregators[agg], selectedRows or rows)', 'memo result of aggregator over values in selected rows for current column') +Sheet.addCommand('z+', 'memo-aggregate', 'cursorCol.memo_aggregate(chooseAggregators(), selectedRows or rows)', 'memo result of aggregator over values in selected rows for current column') ColumnsSheet.addCommand('g+', 'aggregate-cols', 'addAggregators(selectedRows or source[0].nonKeyVisibleCols, chooseAggregators())', 'add aggregators to selected source columns') vd.addMenuItems(''' diff --git a/visidata/column.py b/visidata/column.py index 7bf448cf6..77db1039c 100644 --- a/visidata/column.py +++ b/visidata/column.py @@ -381,7 +381,7 @@ def getCell(self, row): notecolor='color_warning') else: return DisplayWrapper(typedval.val, text=str(typedval.val), - error='unknown', + error=['unknown'], note=options.note_type_exc, notecolor='color_warning') diff --git a/visidata/freqtbl.py b/visidata/freqtbl.py index 60423f0d6..3ef8deaef 100644 --- a/visidata/freqtbl.py +++ b/visidata/freqtbl.py @@ -140,7 +140,7 @@ def rows(self): FreqTableSheet.addCommand('', 'open-preview', 'vd.push(FreqTablePreviewSheet(sheet.name, "preview", source=sheet, columns=source.columns), pane=2); vd.options.disp_splitwin_pct=50', 'open split preview of source rows at cursor') Sheet.addCommand('F', 'freq-col', 'vd.push(makeFreqTable(sheet, cursorCol))', 'open Frequency Table grouped on current column, with aggregations of other columns') -Sheet.addCommand('gF', 'freq-keys', 'vd.push(makeFreqTable(sheet, *keyCols))', 'open Frequency Table grouped by all key columns on source sheet, with aggregations of other columns') +Sheet.addCommand('gF', 'freq-keys', 'vd.push(makeFreqTable(sheet, *keyCols)) if keyCols else vd.fail("there are no key columns to group by")', 'open Frequency Table grouped by all key columns on source sheet, with aggregations of other columns') Sheet.addCommand('zF', 'freq-summary', 'vd.push(makeFreqTableSheetSummary(sheet, Column("Total", sheet=sheet, getter=lambda col, row: "Total")))', 'open one-line summary for all rows and selected rows') ColumnsSheet.addCommand(ENTER, 'freq-row', 'vd.push(makeFreqTable(source[0], cursorRow))', 'open a Frequency Table sheet grouped on column referenced in current row') diff --git a/visidata/pivot.py b/visidata/pivot.py index 773a94444..10c2377be 100644 --- a/visidata/pivot.py +++ b/visidata/pivot.py @@ -291,8 +291,11 @@ def afterLoad(self): @PivotSheet.api def addcol_aggr(sheet, col): hasattr(col, 'origCol') or vd.fail('not an aggregation column') - for agg in vd.chooseAggregators(): - sheet.addColumnAtCursor(makeAggrColumn(col.origCol, vd.aggregators[agg])) + for agg_choice in vd.chooseAggregators(): + agg_or_list = vd.aggregators[agg_choice] + aggs = agg_or_list if isinstance(agg_or_list, list) else [agg_or_list] + for agg in aggs: + sheet.addColumnAtCursor(makeAggrColumn(col.origCol, vd.aggregators[agg])) Sheet.addCommand('W', 'pivot', 'vd.push(makePivot(sheet, keyCols, [cursorCol]))', 'open Pivot Table: group rows by key column and summarize current column') diff --git a/visidata/tests/test_commands.py b/visidata/tests/test_commands.py index 6fab70a8e..ac869b530 100644 --- a/visidata/tests/test_commands.py +++ b/visidata/tests/test_commands.py @@ -93,7 +93,7 @@ def isTestableCommand(longname, cmdlist): 'expand-cols-depth': '0', 'save-cmdlog': 'test_commands.vdj', 'aggregate-col': 'mean', - 'memo-aggregate': 'mean', + 'memo-aggregate': 'count', 'addcol-shell': '', 'theme-input': 'light', 'add-rows': '1',