diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193d..100716ec26 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/dkvp-examples.md b/docs/src/dkvp-examples.md index 2f3e3b5108..da29db4c33 100644 --- a/docs/src/dkvp-examples.md +++ b/docs/src/dkvp-examples.md @@ -251,6 +251,7 @@ a=eks,b=pan,i=2,y=0.522151,ab=ekspan,iy=2.522151,ta=String,tb=String,ti=Integer, a=wye,b=wye,i=3,y=0.338318,ab=wyewye,iy=3.338318,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=eks,b=wye,i=4,y=0.134188,ab=ekswye,iy=4.134188,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=wye,b=pan,i=5,y=0.863624,ab=wyepan,iy=5.863624,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 Run as-is, then pipe to Miller for pretty-printing: @@ -265,4 +266,5 @@ eks pan 2 0.522151 ekspan 2.522151 String String Integer Float String Float wye wye 3 0.338318 wyewye 3.338318 String String Integer Float String Float eks wye 4 0.134188 ekswye 4.134188 String String Integer Float String Float wye pan 5 0.863624 wyepan 5.863624 String String Integer Float String Float +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 diff --git a/docs/src/manpage.md b/docs/src/manpage.md index f0da5aea96..5ab08d2552 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -203,32 +203,34 @@ MILLER(1) MILLER(1) unsparsify 1mFUNCTION LIST0m - abs acos acosh any append apply arrayify asin asinh asserting_absent + abs acos acosh antimode any append apply arrayify asin asinh asserting_absent asserting_array asserting_bool asserting_boolean asserting_empty asserting_empty_map asserting_error asserting_float asserting_int asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh - depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor - fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values - gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec - hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty - is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map + count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 + flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys + get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec + hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean + is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present - is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 + is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 - log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min - mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm + log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 + mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate + nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 - sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt - ssub strfntime strfntime_local strftime strftime_local string strip strlen - strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 - sysntime system systime systimeint tan tanh tolower toupper truncate typeof - unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- - ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx + splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime + strftime_local string strip strlen strpntime strpntime_local strptime + strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system + systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat + unformatx upntime uptime urand urand32 urandelement urandint urandrange + utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < + << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2185,6 +2187,12 @@ MILLER(1) MILLER(1) 1macosh0m (class=math #args=1) Inverse hyperbolic cosine. + 1mantimode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + antimode([3,3,4,4,4]) is 3 + antimode([3,3,4,4]) is 3 + 1many0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: @@ -2309,6 +2317,12 @@ MILLER(1) MILLER(1) 1mcosh0m (class=math #args=1) Hyperbolic cosine. + 1mcount0m + (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types. + Examples: + count([7,8,9]) is 3 + count({"a":7,"b":8,"c":9}) is 3 + 1mdepth0m (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0. @@ -2318,6 +2332,13 @@ MILLER(1) MILLER(1) 1mdhms2sec0m (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000 + 1mdistinct_count0m + (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Examples: + distinct_count([7,8,9,7]) is 3 + distinct_count([1,"1"]) is 1 + distinct_count([1,1.0]) is 2 + 1merf0m (class=math #args=1) Error function. @@ -2542,6 +2563,11 @@ MILLER(1) MILLER(1) 1mjson_stringify0m (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. + 1mkurtosis0m + (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + kurtosis([4,5,9,10,11]) is -1.6703688 + 1mlatin1_to_utf80m (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it. Examples: @@ -2610,20 +2636,53 @@ MILLER(1) MILLER(1) (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. 1mmax0m - (class=math #args=variadic) Max of n numbers; null loses. + (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mmaxlen0m + (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + maxlen(["ao", "alto"]) is 4 1mmd50m (class=hashing #args=1) MD5 hash. + 1mmean0m + (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + mean([4,5,7,10]) is 6.5 + + 1mmeaneb0m + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + meaneb([4,5,7,10]) is 1.3228756 + + 1mmedian0m + (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + median([3,4,5,6,9,10]) is 6 + median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5 + median(["abc", "def", "ghi", "ghi"]) is "ghi" + 1mmexp0m (class=arithmetic #args=3) a ** b mod m (integers) 1mmin0m - (class=math #args=variadic) Min of n numbers; null loses. + (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mminlen0m + (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + minlen(["ao", "alto"]) is 3 1mmmul0m (class=arithmetic #args=3) a * b mod m (integers) + 1mmode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + mode([3,3,4,4,4]) is 4 + mode([3,3,4,4]) is 3 + 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) @@ -2653,9 +2712,70 @@ MILLER(1) MILLER(1) nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" + 1mnull_count0m + (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Example: + null_count(["a", "", "c"]) is 1 + 1mos0m (class=system #args=0) Returns the operating-system name as a string. + 1mpercentile0m + (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + percentile([3,4,5,6,9,10], 90) is 10 + percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5 + percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" + + 1mpercentiles0m + (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. + Examples: + + Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort + the input before computing percentiles: + + percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } + percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } + + Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array: + + percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] + + Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces + ,error on string inputs: + + percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } + + The percentiles function always sorts its inputs before computing percentiles. If you know your input + is already sorted -- see also the sort_collection function -- then computation will be faster on + large input if you pass in "array_is_sorted": + + x = [6,5,9,10,4,3] + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + x = sort_collection(x) + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + + You can also leverage this feature to compute percentiles on a sort of your choosing. For example: + + Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") + x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] + Percentiles are taken over the original positions of the words in the array -- "dogs" is last + and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) + x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) + x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) + ["over", "loquaciously"] + 1mpow0m (class=arithmetic #args=2) Exponentiation. Same as **, but as a function. @@ -2752,6 +2872,11 @@ MILLER(1) MILLER(1) 1msinh0m (class=math #args=1) Hyperbolic sine. + 1mskewness0m + (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + skewness([4,5,9,10,11]) is -0.2097285 + 1msort0m (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values. Examples: @@ -2768,6 +2893,9 @@ MILLER(1) MILLER(1) Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}. Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}. + 1msort_collection0m + (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details. + 1msplita0m (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on. Example: @@ -2806,6 +2934,11 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstddev0m + (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + stddev([4,5,9,10,11]) is 3.1144823 + 1mstrfntime0m (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: @@ -2893,6 +3026,26 @@ MILLER(1) MILLER(1) 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. + 1msum0m + (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types. + Example: + sum([1,2,3,4,5]) is 15 + + 1msum20m + (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types. + Example: + sum2([1,2,3,4,5]) is 55 + + 1msum30m + (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types. + Example: + sum3([1,2,3,4,5]) is 225 + + 1msum40m + (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types. + Example: + sum4([1,2,3,4,5]) is 979 + 1msysntime0m (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. @@ -2971,6 +3124,11 @@ MILLER(1) MILLER(1) $y = utf8_to_latin1($x) $* = utf8_to_latin1($*) + 1mvariance0m + (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + variance([4,5,9,10,11]) is 9.7 + 1mversion0m (class=system #args=0) Returns the Miller version as a string. @@ -3472,5 +3630,5 @@ MILLER(1) MILLER(1) - 2023-08-23 MILLER(1) + 2023-08-26 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index b3352b9a62..666177bee9 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -182,32 +182,34 @@ MILLER(1) MILLER(1) unsparsify 1mFUNCTION LIST0m - abs acos acosh any append apply arrayify asin asinh asserting_absent + abs acos acosh antimode any append apply arrayify asin asinh asserting_absent asserting_array asserting_bool asserting_boolean asserting_empty asserting_empty_map asserting_error asserting_float asserting_int asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh - depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor - fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values - gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec - hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty - is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map + count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 + flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys + get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec + hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean + is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present - is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 + is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 - log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min - mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm + log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 + mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate + nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 - sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt - ssub strfntime strfntime_local strftime strftime_local string strip strlen - strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 - sysntime system systime systimeint tan tanh tolower toupper truncate typeof - unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- - ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx + splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime + strftime_local string strip strlen strpntime strpntime_local strptime + strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system + systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat + unformatx upntime uptime urand urand32 urandelement urandint urandrange + utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < + << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2164,6 +2166,12 @@ MILLER(1) MILLER(1) 1macosh0m (class=math #args=1) Inverse hyperbolic cosine. + 1mantimode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + antimode([3,3,4,4,4]) is 3 + antimode([3,3,4,4]) is 3 + 1many0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: @@ -2288,6 +2296,12 @@ MILLER(1) MILLER(1) 1mcosh0m (class=math #args=1) Hyperbolic cosine. + 1mcount0m + (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types. + Examples: + count([7,8,9]) is 3 + count({"a":7,"b":8,"c":9}) is 3 + 1mdepth0m (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0. @@ -2297,6 +2311,13 @@ MILLER(1) MILLER(1) 1mdhms2sec0m (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000 + 1mdistinct_count0m + (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Examples: + distinct_count([7,8,9,7]) is 3 + distinct_count([1,"1"]) is 1 + distinct_count([1,1.0]) is 2 + 1merf0m (class=math #args=1) Error function. @@ -2521,6 +2542,11 @@ MILLER(1) MILLER(1) 1mjson_stringify0m (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. + 1mkurtosis0m + (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + kurtosis([4,5,9,10,11]) is -1.6703688 + 1mlatin1_to_utf80m (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it. Examples: @@ -2589,20 +2615,53 @@ MILLER(1) MILLER(1) (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. 1mmax0m - (class=math #args=variadic) Max of n numbers; null loses. + (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mmaxlen0m + (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + maxlen(["ao", "alto"]) is 4 1mmd50m (class=hashing #args=1) MD5 hash. + 1mmean0m + (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + mean([4,5,7,10]) is 6.5 + + 1mmeaneb0m + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + meaneb([4,5,7,10]) is 1.3228756 + + 1mmedian0m + (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + median([3,4,5,6,9,10]) is 6 + median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5 + median(["abc", "def", "ghi", "ghi"]) is "ghi" + 1mmexp0m (class=arithmetic #args=3) a ** b mod m (integers) 1mmin0m - (class=math #args=variadic) Min of n numbers; null loses. + (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mminlen0m + (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + minlen(["ao", "alto"]) is 3 1mmmul0m (class=arithmetic #args=3) a * b mod m (integers) + 1mmode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + mode([3,3,4,4,4]) is 4 + mode([3,3,4,4]) is 3 + 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) @@ -2632,9 +2691,70 @@ MILLER(1) MILLER(1) nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" + 1mnull_count0m + (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Example: + null_count(["a", "", "c"]) is 1 + 1mos0m (class=system #args=0) Returns the operating-system name as a string. + 1mpercentile0m + (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + percentile([3,4,5,6,9,10], 90) is 10 + percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5 + percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" + + 1mpercentiles0m + (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. + Examples: + + Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort + the input before computing percentiles: + + percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } + percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } + + Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array: + + percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] + + Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces + ,error on string inputs: + + percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } + + The percentiles function always sorts its inputs before computing percentiles. If you know your input + is already sorted -- see also the sort_collection function -- then computation will be faster on + large input if you pass in "array_is_sorted": + + x = [6,5,9,10,4,3] + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + x = sort_collection(x) + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + + You can also leverage this feature to compute percentiles on a sort of your choosing. For example: + + Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") + x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] + Percentiles are taken over the original positions of the words in the array -- "dogs" is last + and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) + x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) + x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) + ["over", "loquaciously"] + 1mpow0m (class=arithmetic #args=2) Exponentiation. Same as **, but as a function. @@ -2731,6 +2851,11 @@ MILLER(1) MILLER(1) 1msinh0m (class=math #args=1) Hyperbolic sine. + 1mskewness0m + (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + skewness([4,5,9,10,11]) is -0.2097285 + 1msort0m (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values. Examples: @@ -2747,6 +2872,9 @@ MILLER(1) MILLER(1) Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}. Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}. + 1msort_collection0m + (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details. + 1msplita0m (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on. Example: @@ -2785,6 +2913,11 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstddev0m + (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + stddev([4,5,9,10,11]) is 3.1144823 + 1mstrfntime0m (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: @@ -2872,6 +3005,26 @@ MILLER(1) MILLER(1) 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. + 1msum0m + (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types. + Example: + sum([1,2,3,4,5]) is 15 + + 1msum20m + (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types. + Example: + sum2([1,2,3,4,5]) is 55 + + 1msum30m + (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types. + Example: + sum3([1,2,3,4,5]) is 225 + + 1msum40m + (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types. + Example: + sum4([1,2,3,4,5]) is 979 + 1msysntime0m (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. @@ -2950,6 +3103,11 @@ MILLER(1) MILLER(1) $y = utf8_to_latin1($x) $* = utf8_to_latin1($*) + 1mvariance0m + (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + variance([4,5,9,10,11]) is 9.7 + 1mversion0m (class=system #args=0) Returns the Miller version as a string. @@ -3451,4 +3609,4 @@ MILLER(1) MILLER(1) - 2023-08-23 MILLER(1) + 2023-08-26 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 7bdb1d5bf9..3d24f09847 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -74,6 +74,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Hashing functions**](#hashing-functions): [md5](#md5), [sha1](#sha1), [sha256](#sha256), [sha512](#sha512). * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). +* [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). * [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). @@ -877,13 +878,13 @@ logifit (class=math #args=3) Given m and b from logistic regression, compute fi ### max
-max  (class=math #args=variadic) Max of n numbers; null loses.
+max  (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
### min
-min  (class=math #args=variadic) Min of n numbers; null loses.
+min  (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -972,6 +973,227 @@ urandint (class=math #args=2) Integer uniformly distributed between inclusive i urandrange (class=math #args=2) Floating-point numbers uniformly distributed on the interval [a, b). +## Stats functions + + +### antimode +
+antimode  (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+antimode([3,3,4,4,4]) is 3
+antimode([3,3,4,4]) is 3
+
+ + +### count +
+count  (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types.
+Examples:
+count([7,8,9]) is 3
+count({"a":7,"b":8,"c":9}) is 3
+
+ + +### distinct_count +
+distinct_count  (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Examples:
+distinct_count([7,8,9,7])  is 3
+distinct_count([1,"1"]) is 1
+distinct_count([1,1.0]) is 2
+
+ + +### kurtosis +
+kurtosis  (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+kurtosis([4,5,9,10,11]) is -1.6703688
+
+ + +### maxlen +
+maxlen  (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+maxlen(["año", "alto"]) is 4
+
+ + +### mean +
+mean  (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+mean([4,5,7,10]) is 6.5
+
+ + +### meaneb +
+meaneb  (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+meaneb([4,5,7,10]) is 1.3228756
+
+ + +### median +
+median  (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+median([3,4,5,6,9,10]) is 6
+median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
+median(["abc", "def", "ghi", "ghi"]) is "ghi"
+
+ + +### minlen +
+minlen  (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+minlen(["año", "alto"]) is 3
+
+ + +### mode +
+mode  (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+mode([3,3,4,4,4]) is 4
+mode([3,3,4,4]) is 3
+
+ + +### null_count +
+null_count  (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Example:
+null_count(["a", "", "c"]) is 1
+
+ + +### percentile +
+percentile  (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+percentile([3,4,5,6,9,10], 90) is 10
+percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
+percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
+
+ + +### percentiles +
+percentiles  (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+Examples:
+
+Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
+the input before computing percentiles:
+
+  percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }
+  percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }
+
+Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]
+
+Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces
+,error on string inputs:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }
+
+The percentiles function always sorts its inputs before computing percentiles. If you know your input
+is already sorted -- see also the sort_collection function -- then computation will be faster on
+large input if you pass in "array_is_sorted":
+
+  x = [6,5,9,10,4,3]
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect
+  x = sort_collection(x)
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct
+
+You can also leverage this feature to compute percentiles on a sort of your choosing. For example:
+
+  Non-sorted input:
+    x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")
+    x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]
+  Percentiles are taken over the original positions of the words in the array -- "dogs" is last
+  and hence appears as p99:
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]
+  With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  With default sorting done outside percentiles, the same:
+    x = sort(x) # or x = sort_collection(x)
+    x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  Now sorting by word length, "loquaciously" is longest and hence is the p99:
+    x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )
+    x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true})
+    ["over", "loquaciously"]
+
+ + +### skewness +
+skewness  (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+skewness([4,5,9,10,11]) is -0.2097285
+
+ + +### sort_collection +
+sort_collection  (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details.
+
+ + +### stddev +
+stddev  (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+stddev([4,5,9,10,11]) is 3.1144823
+
+ + +### sum +
+sum  (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum([1,2,3,4,5]) is 15
+
+ + +### sum2 +
+sum2  (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum2([1,2,3,4,5]) is 55
+
+ + +### sum3 +
+sum3  (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum3([1,2,3,4,5]) is 225
+
+ + +### sum4 +
+sum4  (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum4([1,2,3,4,5]) is 979
+
+ + +### variance +
+variance  (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+variance([4,5,9,10,11]) is 9.7
+
+ ## String functions @@ -1765,3 +1987,4 @@ is_string (class=typing #args=1) True if field is present with string (includin typeof (class=typing #args=1) Convert argument to type of argument (e.g. "str"). For debug. +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 diff --git a/docs/src/reference-dsl-syntax.md b/docs/src/reference-dsl-syntax.md index f2a8b45cb5..cf1b4bc78b 100644 --- a/docs/src/reference-dsl-syntax.md +++ b/docs/src/reference-dsl-syntax.md @@ -35,6 +35,7 @@ i j k 7 8 15 8 9 17 9 10 19 +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 Newlines within the expression are ignored, which can help increase legibility of complex expressions: diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 8e2daf9d02..f688bdd821 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -495,3 +495,4 @@ Notes about all other separators: * `--repifs`: Let IFS be repeated: e.g. for splitting on multiple spaces. * `--rs {string}`: Specify RS for input and output. +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 2b7e9501ff..c94e184c52 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3406,14 +3406,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3422,12 +3422,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3513,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3794,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e1..e475aebf3b 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/internal/pkg/bifs/arithmetic.go b/internal/pkg/bifs/arithmetic.go
index 45fc41390e..86f6d1e7f9 100644
--- a/internal/pkg/bifs/arithmetic.go
+++ b/internal/pkg/bifs/arithmetic.go
@@ -3,6 +3,7 @@ package bifs
 import (
 	"math"
 
+	"github.com/johnkerl/miller/internal/pkg/lib"
 	"github.com/johnkerl/miller/internal/pkg/mlrval"
 )
 
@@ -793,7 +794,7 @@ func min_s_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 }
 
 var min_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{
-	//       .  INT       FLOAT     BOOL      VOID   STRING    ARRAY  MAP    FUNC    ERROR   NULL   ABSENT
+	//       .  INT        FLOAT     BOOL      VOID   STRING    ARRAY  MAP    FUNC   ERROR  NULL   ABSENT
 	/*INT    */ {min_i_ii, min_f_if, _1___, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___},
 	/*FLOAT  */ {min_f_fi, min_f_ff, _1___, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___},
 	/*BOOL   */ {_2___, _2___, min_b_bb, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___},
@@ -807,6 +808,8 @@ var min_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{
 	/*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _null, _absn},
 }
 
+// BIF_min_binary is not a direct DSL function. It's a helper here,
+// and is also exposed publicly for use by the stats1 verb.
 func BIF_min_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 	return (min_dispositions[input1.Type()][input2.Type()])(input1, input2)
 }
@@ -814,15 +817,91 @@ func BIF_min_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 func BIF_min_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
 	if len(mlrvals) == 0 {
 		return mlrval.VOID
-	} else {
-		retval := mlrvals[0]
-		for i := range mlrvals {
-			if i > 0 {
-				retval = BIF_min_binary(retval, mlrvals[i])
-			}
+	}
+	return mlrval.ArrayFold(
+		mlrvals,
+		bif_min_unary(mlrvals[0]),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_min_binary(bif_min_unary(a), bif_min_unary(b))
+		},
+	)
+}
+
+func BIF_min_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	return mlrval.MapFold(
+		m,
+		m.Head.Value,
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_min_binary(a, b)
+		},
+	)
+}
+
+// bif_min_unary allows recursion into arguments, so users can do either
+// min(1,2,3) or min([1,2,3]).
+func bif_min_unary_array(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_min_variadic(input1.AcquireArrayValue())
+}
+func bif_min_unary_map(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_min_within_map_values(input1.AcquireMapValue())
+}
+
+// We get a Golang "initialization loop" due to recursive depth computation
+// if this is defined statically. So, we use a "package init" function.
+var min_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{}
+
+func init() {
+	min_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{
+		/*INT    */ _1u___,
+		/*FLOAT  */ _1u___,
+		/*BOOL   */ _1u___,
+		/*VOID   */ _1u___,
+		/*STRING */ _1u___,
+		/*ARRAY  */ bif_min_unary_array,
+		/*MAP    */ bif_min_unary_map,
+		/*FUNC   */ _erro1,
+		/*ERROR  */ _erro1,
+		/*NULL   */ _null1,
+		/*ABSENT */ _absn1,
+	}
+}
+
+func bif_min_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return min_unary_dispositions[input1.Type()](input1)
+}
+
+// ----------------------------------------------------------------
+func BIF_minlen_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
+	if len(mlrvals) == 0 {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(mlrvals[0].OriginalString())
+	for i, _ := range mlrvals {
+		clen := lib.UTF8Strlen(mlrvals[i].OriginalString())
+		if clen < retval {
+			retval = clen
+		}
+	}
+	return mlrval.FromInt(retval)
+}
+
+func BIF_minlen_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(m.Head.Value.OriginalString())
+	for pe := m.Head.Next; pe != nil; pe = pe.Next {
+		clen := lib.UTF8Strlen(pe.Value.OriginalString())
+		if clen < retval {
+			retval = clen
 		}
-		return retval
 	}
+	return mlrval.FromInt(retval)
 }
 
 // ----------------------------------------------------------------
@@ -891,6 +970,8 @@ var max_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{
 	/*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _absn, _absn},
 }
 
+// BIF_max_binary is not a direct DSL function. It's a helper here,
+// and is also exposed publicly for use by the stats1 verb.
 func BIF_max_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 	return (max_dispositions[input1.Type()][input2.Type()])(input1, input2)
 }
@@ -898,13 +979,89 @@ func BIF_max_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 func BIF_max_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
 	if len(mlrvals) == 0 {
 		return mlrval.VOID
-	} else {
-		retval := mlrvals[0]
-		for i := range mlrvals {
-			if i > 0 {
-				retval = BIF_max_binary(retval, mlrvals[i])
-			}
+	}
+	return mlrval.ArrayFold(
+		mlrvals,
+		bif_max_unary(mlrvals[0]),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_max_binary(bif_max_unary(a), bif_max_unary(b))
+		},
+	)
+}
+
+func BIF_max_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	return mlrval.MapFold(
+		m,
+		m.Head.Value,
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_max_binary(a, b)
+		},
+	)
+}
+
+// bif_max_unary allows recursion into arguments, so users can do either
+// max(1,2,3) or max([1,2,3]).
+func bif_max_unary_array(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_max_variadic(input1.AcquireArrayValue())
+}
+func bif_max_unary_map(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_max_within_map_values(input1.AcquireMapValue())
+}
+
+// We get a Golang "initialization loop" due to recursive depth computation
+// if this is defined statically. So, we use a "package init" function.
+var max_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{}
+
+func init() {
+	max_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{
+		/*INT    */ _1u___,
+		/*FLOAT  */ _1u___,
+		/*BOOL   */ _1u___,
+		/*VOID   */ _1u___,
+		/*STRING */ _1u___,
+		/*ARRAY  */ bif_max_unary_array,
+		/*MAP    */ bif_max_unary_map,
+		/*FUNC   */ _erro1,
+		/*ERROR  */ _erro1,
+		/*NULL   */ _null1,
+		/*ABSENT */ _absn1,
+	}
+}
+
+func bif_max_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return max_unary_dispositions[input1.Type()](input1)
+}
+
+// ----------------------------------------------------------------
+func BIF_maxlen_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
+	if len(mlrvals) == 0 {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(mlrvals[0].OriginalString())
+	for i, _ := range mlrvals {
+		clen := lib.UTF8Strlen(mlrvals[i].OriginalString())
+		if clen > retval {
+			retval = clen
+		}
+	}
+	return mlrval.FromInt(retval)
+}
+
+func BIF_maxlen_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(m.Head.Value.OriginalString())
+	for pe := m.Head.Next; pe != nil; pe = pe.Next {
+		clen := lib.UTF8Strlen(pe.Value.OriginalString())
+		if clen > retval {
+			retval = clen
 		}
-		return retval
 	}
+	return mlrval.FromInt(retval)
 }
diff --git a/internal/pkg/bifs/percentiles.go b/internal/pkg/bifs/percentiles.go
new file mode 100644
index 0000000000..087e7f2000
--- /dev/null
+++ b/internal/pkg/bifs/percentiles.go
@@ -0,0 +1,217 @@
+package bifs
+
+import (
+	"math"
+
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+)
+
+func GetPercentileLinearlyInterpolated(
+	array []*mlrval.Mlrval,
+	n int,
+	p float64,
+) *mlrval.Mlrval {
+	findex := (p / 100.0) * (float64(n) - 1)
+	if findex < 0.0 {
+		findex = 0.0
+	}
+	iindex := int(math.Floor(findex))
+	if iindex >= n-1 {
+		return array[iindex].Copy()
+	} else {
+		// TODO: just do this in float64:
+		// array[iindex] + frac * (array[iindex+1] - array[iindex])
+		frac := mlrval.FromFloat(findex - float64(iindex))
+		diff := BIF_minus_binary(array[iindex+1], array[iindex])
+		prod := BIF_times(frac, diff)
+		return BIF_plus_binary(array[iindex], prod)
+	}
+}
+
+// ================================================================
+// Non-interpolated percentiles (see also https://en.wikipedia.org/wiki/Percentile)
+
+// ----------------------------------------------------------------
+// OPTION 1: int index = p*n/100.0;
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
+// x_p01 0 x_p11  0 x_p21 20 x_p31 20 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81  80 x_p91 100
+// x_p02 0 x_p12  0 x_p22 20 x_p32 20 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82  80 x_p92 100
+// x_p03 0 x_p13  0 x_p23 20 x_p33 20 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83  80 x_p93 100
+// x_p04 0 x_p14  0 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 100 x_p94 100
+// x_p05 0 x_p15  0 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 100 x_p95 100
+// x_p06 0 x_p16  0 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 100 x_p96 100
+// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
+// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
+// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00 0 x_p10 0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 75 x_p70 75 x_p80 100 x_p90 100 x_p100 100
+// x_p01 0 x_p11 0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 75 x_p71 75 x_p81 100 x_p91 100
+// x_p02 0 x_p12 0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 75 x_p72 75 x_p82 100 x_p92 100
+// x_p03 0 x_p13 0 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83 100 x_p93 100
+// x_p04 0 x_p14 0 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84 100 x_p94 100
+// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85 100 x_p95 100
+// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86 100 x_p96 100
+// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87 100 x_p97 100
+// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
+// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
+//
+// ----------------------------------------------------------------
+// OPTION 2: int index = p*(n-1)/100.0;
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00 0 x_p10 0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90 80 x_p100 100
+// x_p01 0 x_p11 0 x_p21 20 x_p31 20 x_p41 40 x_p51 40 x_p61 60 x_p71 60 x_p81 80 x_p91 80
+// x_p02 0 x_p12 0 x_p22 20 x_p32 20 x_p42 40 x_p52 40 x_p62 60 x_p72 60 x_p82 80 x_p92 80
+// x_p03 0 x_p13 0 x_p23 20 x_p33 20 x_p43 40 x_p53 40 x_p63 60 x_p73 60 x_p83 80 x_p93 80
+// x_p04 0 x_p14 0 x_p24 20 x_p34 20 x_p44 40 x_p54 40 x_p64 60 x_p74 60 x_p84 80 x_p94 80
+// x_p05 0 x_p15 0 x_p25 20 x_p35 20 x_p45 40 x_p55 40 x_p65 60 x_p75 60 x_p85 80 x_p95 80
+// x_p06 0 x_p16 0 x_p26 20 x_p36 20 x_p46 40 x_p56 40 x_p66 60 x_p76 60 x_p86 80 x_p96 80
+// x_p07 0 x_p17 0 x_p27 20 x_p37 20 x_p47 40 x_p57 40 x_p67 60 x_p77 60 x_p87 80 x_p97 80
+// x_p08 0 x_p18 0 x_p28 20 x_p38 20 x_p48 40 x_p58 40 x_p68 60 x_p78 60 x_p88 80 x_p98 80
+// x_p09 0 x_p19 0 x_p29 20 x_p39 20 x_p49 40 x_p59 40 x_p69 60 x_p79 60 x_p89 80 x_p99 80
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00 0 x_p10 0 x_p20  0 x_p30 25 x_p40 25 x_p50 50 x_p60 50 x_p70 50 x_p80 75 x_p90 75 x_p100 100
+// x_p01 0 x_p11 0 x_p21  0 x_p31 25 x_p41 25 x_p51 50 x_p61 50 x_p71 50 x_p81 75 x_p91 75
+// x_p02 0 x_p12 0 x_p22  0 x_p32 25 x_p42 25 x_p52 50 x_p62 50 x_p72 50 x_p82 75 x_p92 75
+// x_p03 0 x_p13 0 x_p23  0 x_p33 25 x_p43 25 x_p53 50 x_p63 50 x_p73 50 x_p83 75 x_p93 75
+// x_p04 0 x_p14 0 x_p24  0 x_p34 25 x_p44 25 x_p54 50 x_p64 50 x_p74 50 x_p84 75 x_p94 75
+// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 25 x_p55 50 x_p65 50 x_p75 75 x_p85 75 x_p95 75
+// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 25 x_p56 50 x_p66 50 x_p76 75 x_p86 75 x_p96 75
+// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 25 x_p57 50 x_p67 50 x_p77 75 x_p87 75 x_p97 75
+// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 25 x_p58 50 x_p68 50 x_p78 75 x_p88 75 x_p98 75
+// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 25 x_p59 50 x_p69 50 x_p79 75 x_p89 75 x_p99 75
+//
+// ----------------------------------------------------------------
+// OPTION 3: int index = (int)ceil(p*(n-1)/100.0);
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00  0 x_p10 20 x_p20 20 x_p30 40 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
+// x_p01 20 x_p11 20 x_p21 40 x_p31 40 x_p41 60 x_p51 60 x_p61 80 x_p71 80 x_p81 100 x_p91 100
+// x_p02 20 x_p12 20 x_p22 40 x_p32 40 x_p42 60 x_p52 60 x_p62 80 x_p72 80 x_p82 100 x_p92 100
+// x_p03 20 x_p13 20 x_p23 40 x_p33 40 x_p43 60 x_p53 60 x_p63 80 x_p73 80 x_p83 100 x_p93 100
+// x_p04 20 x_p14 20 x_p24 40 x_p34 40 x_p44 60 x_p54 60 x_p64 80 x_p74 80 x_p84 100 x_p94 100
+// x_p05 20 x_p15 20 x_p25 40 x_p35 40 x_p45 60 x_p55 60 x_p65 80 x_p75 80 x_p85 100 x_p95 100
+// x_p06 20 x_p16 20 x_p26 40 x_p36 40 x_p46 60 x_p56 60 x_p66 80 x_p76 80 x_p86 100 x_p96 100
+// x_p07 20 x_p17 20 x_p27 40 x_p37 40 x_p47 60 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
+// x_p08 20 x_p18 20 x_p28 40 x_p38 40 x_p48 60 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
+// x_p09 20 x_p19 20 x_p29 40 x_p39 40 x_p49 60 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00  0 x_p10 25 x_p20 25 x_p30 50 x_p40 50 x_p50 50 x_p60 75 x_p70  75 x_p80 100 x_p90 100 x_p100 100
+// x_p01 25 x_p11 25 x_p21 25 x_p31 50 x_p41 50 x_p51 75 x_p61 75 x_p71  75 x_p81 100 x_p91 100
+// x_p02 25 x_p12 25 x_p22 25 x_p32 50 x_p42 50 x_p52 75 x_p62 75 x_p72  75 x_p82 100 x_p92 100
+// x_p03 25 x_p13 25 x_p23 25 x_p33 50 x_p43 50 x_p53 75 x_p63 75 x_p73  75 x_p83 100 x_p93 100
+// x_p04 25 x_p14 25 x_p24 25 x_p34 50 x_p44 50 x_p54 75 x_p64 75 x_p74  75 x_p84 100 x_p94 100
+// x_p05 25 x_p15 25 x_p25 25 x_p35 50 x_p45 50 x_p55 75 x_p65 75 x_p75  75 x_p85 100 x_p95 100
+// x_p06 25 x_p16 25 x_p26 50 x_p36 50 x_p46 50 x_p56 75 x_p66 75 x_p76 100 x_p86 100 x_p96 100
+// x_p07 25 x_p17 25 x_p27 50 x_p37 50 x_p47 50 x_p57 75 x_p67 75 x_p77 100 x_p87 100 x_p97 100
+// x_p08 25 x_p18 25 x_p28 50 x_p38 50 x_p48 50 x_p58 75 x_p68 75 x_p78 100 x_p88 100 x_p98 100
+// x_p09 25 x_p19 25 x_p29 50 x_p39 50 x_p49 50 x_p59 75 x_p69 75 x_p79 100 x_p89 100 x_p99 100
+//
+// ----------------------------------------------------------------
+// OPTION 4: int index = (int)ceil(-0.5 + p*(n-1)/100.0);
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90  80 x_p100 100
+// x_p01 0 x_p11 20 x_p21 20 x_p31 40 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81 80 x_p91 100
+// x_p02 0 x_p12 20 x_p22 20 x_p32 40 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82 80 x_p92 100
+// x_p03 0 x_p13 20 x_p23 20 x_p33 40 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83 80 x_p93 100
+// x_p04 0 x_p14 20 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 80 x_p94 100
+// x_p05 0 x_p15 20 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 80 x_p95 100
+// x_p06 0 x_p16 20 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 80 x_p96 100
+// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 60 x_p77 80 x_p87 80 x_p97 100
+// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 60 x_p78 80 x_p88 80 x_p98 100
+// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 60 x_p79 80 x_p89 80 x_p99 100
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00 0 x_p10  0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 50 x_p70 75 x_p80  75 x_p90 100 x_p100 100
+// x_p01 0 x_p11  0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 50 x_p71 75 x_p81  75 x_p91 100
+// x_p02 0 x_p12  0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 50 x_p72 75 x_p82  75 x_p92 100
+// x_p03 0 x_p13 25 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83  75 x_p93 100
+// x_p04 0 x_p14 25 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84  75 x_p94 100
+// x_p05 0 x_p15 25 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85  75 x_p95 100
+// x_p06 0 x_p16 25 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86  75 x_p96 100
+// x_p07 0 x_p17 25 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87  75 x_p97 100
+// x_p08 0 x_p18 25 x_p28 25 x_p38 50 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
+// x_p09 0 x_p19 25 x_p29 25 x_p39 50 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
+//
+// ----------------------------------------------------------------
+// CONCLUSION:
+// * I like option 2 for its simplicity ...
+// * ... but option 1 matches R's quantile with type=1.
+// * (Note that Miller's interpolated percentiles match match R's quantile with type=7)
+// ----------------------------------------------------------------
+
+func GetPercentileNonInterpolated(
+	array []*mlrval.Mlrval,
+	n int,
+	p float64,
+) *mlrval.Mlrval {
+	index := int(p * float64(n) / 100.0)
+	//index := p * (float64(float64(n)) - 1) / 100.0
+	//index := int(ceil(p * (float64(n) - 1) / 100.0))
+	//index := int(ceil(-0.5 + p*(float64(n)-1)/100.0))
+	if index >= n {
+		index = n - 1
+	}
+	if index < 0 {
+		index = 0
+	}
+	return array[index].Copy()
+}
diff --git a/internal/pkg/bifs/stats.go b/internal/pkg/bifs/stats.go
index efcabec76b..99e1e0ccd3 100644
--- a/internal/pkg/bifs/stats.go
+++ b/internal/pkg/bifs/stats.go
@@ -2,6 +2,7 @@ package bifs
 
 import (
 	"math"
+	"sort"
 
 	"github.com/johnkerl/miller/internal/pkg/lib"
 	"github.com/johnkerl/miller/internal/pkg/mlrval"
@@ -24,7 +25,7 @@ import (
 //	output = [m, b, math.sqrt(var_m), math.sqrt(var_b)]
 
 // ----------------------------------------------------------------
-func BIF_get_var(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
+func BIF_finalize_variance(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 	n, isInt := mn.GetIntValue()
 	lib.InternalCodingErrorIf(!isInt)
 	sum, isNumber := msum.GetNumericToFloatValue()
@@ -46,8 +47,8 @@ func BIF_get_var(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 }
 
 // ----------------------------------------------------------------
-func BIF_get_stddev(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
-	mvar := BIF_get_var(mn, msum, msum2)
+func BIF_finalize_stddev(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
+	mvar := BIF_finalize_variance(mn, msum, msum2)
 	if mvar.IsVoid() {
 		return mvar
 	}
@@ -55,8 +56,8 @@ func BIF_get_stddev(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 }
 
 // ----------------------------------------------------------------
-func BIF_get_mean_EB(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
-	mvar := BIF_get_var(mn, msum, msum2)
+func BIF_finalize_mean_eb(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
+	mvar := BIF_finalize_variance(mn, msum, msum2)
 	if mvar.IsVoid() {
 		return mvar
 	}
@@ -87,7 +88,7 @@ func BIF_get_mean_EB(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 //   = sumx2 - n mean^2
 
 // ----------------------------------------------------------------
-func BIF_get_skewness(mn, msum, msum2, msum3 *mlrval.Mlrval) *mlrval.Mlrval {
+func BIF_finalize_skewness(mn, msum, msum2, msum3 *mlrval.Mlrval) *mlrval.Mlrval {
 	n, isInt := mn.GetIntValue()
 	lib.InternalCodingErrorIf(!isInt)
 	if n < 2 {
@@ -124,7 +125,7 @@ func BIF_get_skewness(mn, msum, msum2, msum3 *mlrval.Mlrval) *mlrval.Mlrval {
 //   = sumx4 - mean*(4 sumx3 - mean*(6 sumx2 - 3 n mean^2))
 
 // ----------------------------------------------------------------
-func BIF_get_kurtosis(mn, msum, msum2, msum3, msum4 *mlrval.Mlrval) *mlrval.Mlrval {
+func BIF_finalize_kurtosis(mn, msum, msum2, msum3, msum4 *mlrval.Mlrval) *mlrval.Mlrval {
 	n, isInt := mn.GetIntValue()
 	lib.InternalCodingErrorIf(!isInt)
 	if n < 2 {
@@ -149,3 +150,485 @@ func BIF_get_kurtosis(mn, msum, msum2, msum3, msum4 *mlrval.Mlrval) *mlrval.Mlrv
 	return mlrval.FromFloat(numerator/denominator - 3.0)
 
 }
+
+// ================================================================
+// STATS ROUTINES -- other than min/max which are placed separately.
+
+// This is a helper function for BIFs which operate only on array or map.
+// It shorthands what values to return for non-collection inputs.
+func check_collection(c *mlrval.Mlrval) (bool, *mlrval.Mlrval) {
+	vtype := c.Type()
+	switch vtype {
+	case mlrval.MT_ARRAY:
+		return true, c
+	case mlrval.MT_MAP:
+		return true, c
+	case mlrval.MT_ABSENT:
+		return false, mlrval.ABSENT
+	default:
+		return false, mlrval.ERROR
+	}
+}
+
+// collection_sum_of_function sums f(value) for value in the array or map:
+// e.g. sum of values, sum of squares of values, etc.
+func collection_sum_of_function(
+	collection *mlrval.Mlrval,
+	f func(element *mlrval.Mlrval) *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return mlrval.CollectionFold(
+		collection,
+		mlrval.FromInt(0),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_plus_binary(a, f(b))
+		},
+	)
+}
+
+func BIF_count(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	if collection.IsArray() {
+		arrayval := collection.AcquireArrayValue()
+		return mlrval.FromInt(int64(len(arrayval)))
+	} else {
+		mapval := collection.AcquireMapValue()
+		return mlrval.FromInt(mapval.FieldCount)
+	}
+}
+
+func BIF_null_count(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		if element.IsVoid() || element.IsNull() {
+			return mlrval.FromInt(1)
+		} else {
+			return mlrval.FromInt(0)
+		}
+	}
+	return mlrval.CollectionFold(
+		collection,
+		mlrval.FromInt(0),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_plus_binary(a, f(b))
+		},
+	)
+}
+
+func BIF_distinct_count(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	counts := make(map[string]int)
+	if collection.IsArray() {
+		a := collection.AcquireArrayValue()
+		for _, e := range a {
+			valueString := e.OriginalString()
+			counts[valueString] += 1
+		}
+	} else {
+		m := collection.AcquireMapValue()
+		for pe := m.Head; pe != nil; pe = pe.Next {
+			valueString := pe.Value.OriginalString()
+			counts[valueString] += 1
+		}
+	}
+	return mlrval.FromInt(int64(len(counts)))
+}
+
+func BIF_mode(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	return bif_mode_or_antimode(collection, func(a, b int) bool { return a > b })
+}
+
+func BIF_antimode(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	return bif_mode_or_antimode(collection, func(a, b int) bool { return a < b })
+}
+
+func bif_mode_or_antimode(
+	collection *mlrval.Mlrval,
+	cmp func(int, int) bool,
+) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+
+	// Do not use a Go map[string]int as that makes the output in the case of ties
+	// (e.g. input = [3,3,4,4]) non-determinstic. That's bad for unit tests and also
+	// simply bad UX.
+	counts := lib.NewOrderedMap()
+
+	// We use stringification to detect uniqueness. Yet we want the output to be typed,
+	// e.g. mode of an array of ints should be an int, not a string. Here we store
+	// a reference to one representative for each equivalence class.
+	reps := lib.NewOrderedMap()
+
+	if collection.IsArray() {
+		a := collection.AcquireArrayValue()
+		if len(a) == 0 {
+			return mlrval.VOID
+		}
+		for _, e := range a {
+			valueString := e.OriginalString()
+			if counts.Has(valueString) {
+				counts.Put(valueString, counts.Get(valueString).(int)+1)
+			} else {
+				counts.Put(valueString, 1)
+				reps.Put(valueString, e)
+			}
+		}
+	} else {
+		m := collection.AcquireMapValue()
+		if m.Head == nil {
+			return mlrval.VOID
+		}
+		for pe := m.Head; pe != nil; pe = pe.Next {
+			valueString := pe.Value.OriginalString()
+			if counts.Has(valueString) {
+				counts.Put(valueString, counts.Get(valueString).(int)+1)
+			} else {
+				counts.Put(valueString, 1)
+				reps.Put(valueString, pe.Value)
+			}
+		}
+	}
+	first := true
+	maxk := ""
+	maxv := -1
+	for pf := counts.Head; pf != nil; pf = pf.Next {
+		k := pf.Key
+		v := pf.Value.(int)
+		if first || cmp(v, maxv) {
+			maxk = k
+			maxv = v
+			first = false
+		}
+	}
+	// OrderedMap has interface{} values, so dereference as Mlrval. Then, copy the Mlrval
+	// so we're not returning a pointer to input data.
+	return reps.Get(maxk).(*mlrval.Mlrval).Copy()
+}
+
+func BIF_sum(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	return collection_sum_of_function(
+		collection,
+		func(e *mlrval.Mlrval) *mlrval.Mlrval {
+			return e
+		},
+	)
+}
+
+func BIF_sum2(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		return BIF_times(element, element)
+	}
+	return collection_sum_of_function(collection, f)
+}
+
+func BIF_sum3(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		return BIF_times(element, BIF_times(element, element))
+	}
+	return collection_sum_of_function(collection, f)
+}
+
+func BIF_sum4(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		sq := BIF_times(element, element)
+		return BIF_times(sq, sq)
+	}
+	return collection_sum_of_function(collection, f)
+}
+
+func BIF_mean(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	if n.AcquireIntValue() == 0 {
+		return mlrval.VOID
+	}
+	sum := BIF_sum(collection)
+	return BIF_divide(sum, n)
+}
+
+func BIF_meaneb(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	return BIF_finalize_mean_eb(n, sum, sum2)
+}
+
+func BIF_variance(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	return BIF_finalize_variance(n, sum, sum2)
+}
+
+func BIF_stddev(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	return BIF_finalize_stddev(n, sum, sum2)
+}
+
+func BIF_skewness(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	sum3 := BIF_sum3(collection)
+	return BIF_finalize_skewness(n, sum, sum2, sum3)
+}
+
+func BIF_kurtosis(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	sum3 := BIF_sum3(collection)
+	sum4 := BIF_sum4(collection)
+	return BIF_finalize_kurtosis(n, sum, sum2, sum3, sum4)
+}
+
+func BIF_minlen(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	if collection.IsArray() {
+		return BIF_minlen_variadic(collection.AcquireArrayValue())
+	} else {
+		return BIF_minlen_within_map_values(collection.AcquireMapValue())
+	}
+}
+
+func BIF_maxlen(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	if collection.IsArray() {
+		return BIF_maxlen_variadic(collection.AcquireArrayValue())
+	} else {
+		return BIF_maxlen_within_map_values(collection.AcquireMapValue())
+	}
+}
+
+func BIF_sort_collection(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+
+	var array []*mlrval.Mlrval
+	if collection.IsArray() {
+		arrayval := collection.AcquireArrayValue()
+		n := len(arrayval)
+		array = make([]*mlrval.Mlrval, n)
+		for i := 0; i < n; i++ {
+			array[i] = arrayval[i].Copy()
+		}
+	} else {
+		mapval := collection.AcquireMapValue()
+		n := mapval.FieldCount
+		array = make([]*mlrval.Mlrval, n)
+		i := 0
+		for pe := mapval.Head; pe != nil; pe = pe.Next {
+			array[i] = pe.Value.Copy()
+			i++
+		}
+	}
+
+	sort.Slice(array, func(i, j int) bool {
+		return mlrval.LessThan(array[i], array[j])
+	})
+
+	return mlrval.FromArray(array)
+}
+
+func BIF_median(
+	collection *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentile(collection, mlrval.FromFloat(50.0))
+}
+
+func BIF_median_with_options(
+	collection *mlrval.Mlrval,
+	options *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentile_with_options(collection, mlrval.FromFloat(50.0), options)
+}
+
+func BIF_percentile(
+	collection *mlrval.Mlrval,
+	percentile *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentile_with_options(collection, percentile, nil)
+}
+
+func BIF_percentile_with_options(
+	collection *mlrval.Mlrval,
+	percentile *mlrval.Mlrval,
+	options *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	percentiles := mlrval.FromSingletonArray(percentile)
+	outputs := BIF_percentiles_with_options(collection, percentiles, options)
+	return outputs.AcquireMapValue().Head.Value
+}
+
+func BIF_percentiles(
+	collection *mlrval.Mlrval,
+	percentiles *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentiles_with_options(collection, percentiles, nil)
+}
+
+func BIF_percentiles_with_options(
+	collection *mlrval.Mlrval,
+	percentiles *mlrval.Mlrval,
+	options *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+
+	array_is_sorted := false
+	interpolate_linearly := false
+	output_array_not_map := false
+
+	if options != nil {
+		om := options.GetMap()
+		if om == nil { // not a map
+			return mlrval.ERROR
+		}
+		for pe := om.Head; pe != nil; pe = pe.Next {
+			if pe.Key == "array_is_sorted" || pe.Key == "ais" {
+				if mlrval.Equals(pe.Value, mlrval.TRUE) {
+					array_is_sorted = true
+				} else if mlrval.Equals(pe.Value, mlrval.FALSE) {
+					array_is_sorted = false
+				} else {
+					return mlrval.ERROR
+				}
+			} else if pe.Key == "interpolate_linearly" || pe.Key == "il" {
+				if mlrval.Equals(pe.Value, mlrval.TRUE) {
+					interpolate_linearly = true
+				} else if mlrval.Equals(pe.Value, mlrval.FALSE) {
+					interpolate_linearly = false
+				} else {
+					return mlrval.ERROR
+				}
+			} else if pe.Key == "output_array_not_map" || pe.Key == "oa" {
+				if mlrval.Equals(pe.Value, mlrval.TRUE) {
+					output_array_not_map = true
+				} else if mlrval.Equals(pe.Value, mlrval.FALSE) {
+					output_array_not_map = false
+				} else {
+					return mlrval.ERROR
+				}
+			}
+		}
+	}
+
+	var sorted_array *mlrval.Mlrval
+	if array_is_sorted {
+		if !collection.IsArray() {
+			return mlrval.ERROR
+		}
+		sorted_array = collection
+	} else {
+		sorted_array = BIF_sort_collection(collection)
+	}
+
+	return bif_percentiles(
+		sorted_array.AcquireArrayValue(),
+		percentiles,
+		interpolate_linearly,
+		output_array_not_map,
+	)
+}
+
+func bif_percentiles(
+	sorted_array []*mlrval.Mlrval,
+	percentiles *mlrval.Mlrval,
+	interpolate_linearly bool,
+	output_array_not_map bool,
+) *mlrval.Mlrval {
+
+	ps := percentiles.GetArray()
+	if ps == nil { // not an array
+		return mlrval.ERROR
+	}
+
+	outputs := make([]*mlrval.Mlrval, len(ps))
+
+	for i, _ := range ps {
+		p, ok := ps[i].GetNumericToFloatValue()
+		if !ok {
+			outputs[i] = mlrval.ERROR.Copy()
+		} else if len(sorted_array) == 0 {
+			outputs[i] = mlrval.VOID
+		} else {
+			if interpolate_linearly {
+				outputs[i] = GetPercentileLinearlyInterpolated(sorted_array, len(sorted_array), p)
+			} else {
+				outputs[i] = GetPercentileNonInterpolated(sorted_array, len(sorted_array), p)
+			}
+		}
+	}
+
+	if output_array_not_map {
+		return mlrval.FromArray(outputs)
+	} else {
+		m := mlrval.NewMlrmap()
+		for i, _ := range ps {
+			sp := ps[i].String()
+			m.PutCopy(sp, outputs[i])
+		}
+		return mlrval.FromMap(m)
+	}
+}
diff --git a/internal/pkg/bifs/stats_test.go b/internal/pkg/bifs/stats_test.go
new file mode 100644
index 0000000000..0d1276ba18
--- /dev/null
+++ b/internal/pkg/bifs/stats_test.go
@@ -0,0 +1,192 @@
+package bifs
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+)
+
+func stats_test_array(n int) *mlrval.Mlrval {
+	a := make([]*mlrval.Mlrval, n)
+	for i := 0; i < n; i++ {
+		a[i] = mlrval.FromInt(int64(i))
+	}
+	return mlrval.FromArray(a)
+}
+
+func array_to_map_for_test(a *mlrval.Mlrval) *mlrval.Mlrval {
+	array := a.AcquireArrayValue()
+	m := mlrval.NewMlrmap()
+	for i := 0; i < len(array); i++ {
+		key := fmt.Sprint(i)
+		val := array[i]
+		m.PutCopy(key, val)
+	}
+	return mlrval.FromMap(m)
+}
+
+func TestBIF_count(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	for n := 0; n < 5; n++ {
+		input = stats_test_array(n)
+		assert.True(t, mlrval.Equals(BIF_count(input), mlrval.FromInt(int64(n))))
+
+		input = array_to_map_for_test(input)
+		assert.True(t, mlrval.Equals(BIF_count(input), mlrval.FromInt(int64(n))))
+	}
+}
+
+func TestBIF_distinct_count(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+		mlrval.FromInt(3),
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+	})
+	assert.True(t, mlrval.Equals(BIF_distinct_count(input), mlrval.FromInt(3)))
+
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_distinct_count(input), mlrval.FromInt(3)))
+}
+
+func TestBIF_null_count(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromString("two"),
+		mlrval.FromString(""), // this counts
+		mlrval.ERROR,
+		mlrval.ABSENT,
+		mlrval.NULL, // this counts
+	})
+	assert.True(t, mlrval.Equals(BIF_null_count(input), mlrval.FromInt(2)))
+
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_null_count(input), mlrval.FromInt(2)))
+
+}
+
+func TestBIF_mode_and_antimode(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	// Empty array
+	input = mlrval.FromArray([]*mlrval.Mlrval{})
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.VOID))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.VOID))
+
+	// Empty map
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.VOID))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.VOID))
+
+	// Clear winner as array
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+		mlrval.FromInt(3),
+		mlrval.FromInt(1),
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+	})
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(3)))
+
+	// Clear winner as map
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(3)))
+
+	// Ties as array -- first-found breaks the tie
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromInt(1),
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+		mlrval.FromInt(2),
+		mlrval.FromInt(2),
+	})
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(1)))
+
+	// Clear winner as map
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(1)))
+}
+
+func TestBIF_sum(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	// TODO: test empty array/map
+	for n := 1; n < 5; n++ {
+		input = stats_test_array(n)
+		var isum1 int64
+		var isum2 int64
+		var isum3 int64
+		var isum4 int64
+		for _, e := range input.AcquireArrayValue() {
+			v := e.AcquireIntValue()
+			isum1 += v
+			isum2 += v * v
+			isum3 += v * v * v
+			isum4 += v * v * v * v
+		}
+		assert.True(t, mlrval.Equals(BIF_sum(input), mlrval.FromInt(isum1)))
+		assert.True(t, mlrval.Equals(BIF_sum2(input), mlrval.FromInt(isum2)))
+		assert.True(t, mlrval.Equals(BIF_sum3(input), mlrval.FromInt(isum3)))
+		assert.True(t, mlrval.Equals(BIF_sum4(input), mlrval.FromInt(isum4)))
+
+		input = array_to_map_for_test(input)
+		assert.True(t, mlrval.Equals(BIF_sum(input), mlrval.FromInt(isum1)))
+		assert.True(t, mlrval.Equals(BIF_sum2(input), mlrval.FromInt(isum2)))
+		assert.True(t, mlrval.Equals(BIF_sum3(input), mlrval.FromInt(isum3)))
+		assert.True(t, mlrval.Equals(BIF_sum4(input), mlrval.FromInt(isum4)))
+	}
+}
+
+// More easily tested (much lower keystroking) within the regression-test framework:
+
+// BIF_mean
+// BIF_meaneb
+// BIF_variance
+// BIF_stddev
+// BIF_skewness
+// BIF_kurtosis
+
+// BIF_min
+// BIF_max
+
+// BIF_minlen
+// BIF_maxlen
+
+// BIF_median
+// BIF_median_with_options
+// BIF_percentile
+// BIF_percentile_with_options
+// BIF_percentiles
+// BIF_percentiles_with_options
+
+// BIF_sort_collection
diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go
index 876fcdb290..b066955362 100644
--- a/internal/pkg/dsl/cst/builtin_function_manager.go
+++ b/internal/pkg/dsl/cst/builtin_function_manager.go
@@ -29,6 +29,7 @@ type TFunctionClass string
 const (
 	FUNC_CLASS_ARITHMETIC  TFunctionClass = "arithmetic"
 	FUNC_CLASS_MATH        TFunctionClass = "math"
+	FUNC_CLASS_STATS       TFunctionClass = "stats"
 	FUNC_CLASS_BOOLEAN     TFunctionClass = "boolean"
 	FUNC_CLASS_STRING      TFunctionClass = "string"
 	FUNC_CLASS_HASHING     TFunctionClass = "hashing"
@@ -846,14 +847,14 @@ is normally distributed.`,
 		{
 			name:         "max",
 			class:        FUNC_CLASS_MATH,
-			help:         `Max of n numbers; null loses.`,
+			help:         `Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.`,
 			variadicFunc: bifs.BIF_max_variadic,
 		},
 
 		{
 			name:         "min",
 			class:        FUNC_CLASS_MATH,
-			help:         `Min of n numbers; null loses.`,
+			help:         `Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.`,
 			variadicFunc: bifs.BIF_min_variadic,
 		},
 
@@ -958,6 +959,276 @@ is normally distributed.`,
 			unaryFunc: bifs.BIF_urandelement,
 		},
 
+		// ----------------------------------------------------------------
+		// FUNC_CLASS_STATS
+
+		{
+			name:      "count",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the length of an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_count,
+			examples: []string{
+				"count([7,8,9]) is 3",
+				`count({"a":7,"b":8,"c":9}) is 3`,
+			},
+		},
+
+		{
+			name:      "distinct_count",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.`,
+			unaryFunc: bifs.BIF_distinct_count,
+			examples: []string{
+				`distinct_count([7,8,9,7])  is 3`,
+				`distinct_count([1,"1"]) is 1`,
+				`distinct_count([1,1.0]) is 2`,
+			},
+		},
+
+		{
+			name:      "null_count",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.`,
+			unaryFunc: bifs.BIF_null_count,
+			examples: []string{
+				`null_count(["a", "", "c"]) is 1`,
+			},
+		},
+
+		{
+			name:      "mode",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.`,
+			unaryFunc: bifs.BIF_mode,
+			examples: []string{
+				`mode([3,3,4,4,4]) is 4`,
+				`mode([3,3,4,4]) is 3`,
+			},
+		},
+
+		{
+			name:      "antimode",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.`,
+			unaryFunc: bifs.BIF_antimode,
+			examples: []string{
+				`antimode([3,3,4,4,4]) is 3`,
+				`antimode([3,3,4,4]) is 3`,
+			},
+		},
+
+		{
+			name:      "sum",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum,
+			examples: []string{
+				`sum([1,2,3,4,5]) is 15`,
+			},
+		},
+
+		{
+			name:      "sum2",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum2,
+			examples: []string{
+				`sum2([1,2,3,4,5]) is 55`,
+			},
+		},
+
+		{
+			name:      "sum3",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum3,
+			examples: []string{
+				`sum3([1,2,3,4,5]) is 225`,
+			},
+		},
+
+		{
+			name:      "sum4",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum4,
+			examples: []string{
+				`sum4([1,2,3,4,5]) is 979`,
+			},
+		},
+
+		{
+			name:      "mean",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_mean,
+			examples: []string{
+				`mean([4,5,7,10]) is 6.5`,
+			},
+		},
+
+		{
+			name:      "meaneb",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_meaneb,
+			examples: []string{
+				`meaneb([4,5,7,10]) is 1.3228756`,
+			},
+		},
+
+		{
+			name:      "variance",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_variance,
+			examples: []string{
+				`variance([4,5,9,10,11]) is 9.7`,
+			},
+		},
+
+		{
+			name:      "stddev",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_stddev,
+			examples: []string{
+				`stddev([4,5,9,10,11]) is 3.1144823`,
+			},
+		},
+
+		{
+			name:      "skewness",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_skewness,
+			examples: []string{
+				`skewness([4,5,9,10,11]) is -0.2097285`,
+			},
+		},
+
+		{
+			name:      "kurtosis",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_kurtosis,
+			examples: []string{
+				`kurtosis([4,5,9,10,11]) is -1.6703688`,
+			},
+		},
+
+		{
+			name:      "minlen",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_minlen,
+			examples: []string{
+				`minlen(["año", "alto"]) is 3`,
+			},
+		},
+
+		{
+			name:      "maxlen",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_maxlen,
+			examples: []string{
+				`maxlen(["año", "alto"]) is 4`,
+			},
+		},
+
+		{
+			name:               "median",
+			class:              FUNC_CLASS_STATS,
+			help:               `Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.`,
+			unaryFunc:          bifs.BIF_median,
+			binaryFunc:         bifs.BIF_median_with_options,
+			hasMultipleArities: true,
+			examples: []string{
+				`median([3,4,5,6,9,10]) is 6`,
+				`median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5`,
+				`median(["abc", "def", "ghi", "ghi"]) is "ghi"`,
+			},
+		},
+
+		{
+			name:               "percentile",
+			class:              FUNC_CLASS_STATS,
+			help:               `Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.`,
+			binaryFunc:         bifs.BIF_percentile,
+			ternaryFunc:        bifs.BIF_percentile_with_options,
+			hasMultipleArities: true,
+			examples: []string{
+				`percentile([3,4,5,6,9,10], 90) is 10`,
+				`percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5`,
+				`percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"`,
+			},
+		},
+
+		{
+			name:               "percentiles",
+			class:              FUNC_CLASS_STATS,
+			help:               `Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.`,
+			binaryFunc:         bifs.BIF_percentiles,
+			ternaryFunc:        bifs.BIF_percentiles_with_options,
+			hasMultipleArities: true,
+			examples: []string{
+				``,
+				`Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort`,
+				`the input before computing percentiles:`,
+				``,
+				`  percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }`,
+				`  percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }`,
+				``,
+				`Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:`,
+				``,
+				`  percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]`,
+				``,
+				`Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces`,
+				`,error on string inputs:`,
+				``,
+				`  percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }`,
+				``,
+				`The percentiles function always sorts its inputs before computing percentiles. If you know your input`,
+				`is already sorted -- see also the sort_collection function -- then computation will be faster on`,
+				`large input if you pass in "array_is_sorted":`,
+				``,
+				`  x = [6,5,9,10,4,3]`,
+				`  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect`,
+				`  x = sort_collection(x)`,
+				`  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct`,
+				``,
+				`You can also leverage this feature to compute percentiles on a sort of your choosing. For example:`,
+				``,
+				`  Non-sorted input:`,
+				`    x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")`,
+				`    x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]`,
+				`  Percentiles are taken over the original positions of the words in the array -- "dogs" is last`,
+				`  and hence appears as p99:`,
+				`    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]`,
+				`  With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:`,
+				`    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`,
+				`  With default sorting done outside percentiles, the same:`,
+				`    x = sort(x) # or x = sort_collection(x)`,
+				`    x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]`,
+				`    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]`,
+				`    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`,
+				`  Now sorting by word length, "loquaciously" is longest and hence is the p99:`,
+				`    x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )`,
+				`    x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]`,
+				`    percentiles(x, [50, 99], {"oa":true, "ais":true})`,
+				`    ["over", "loquaciously"]`,
+			},
+		},
+
+		{
+			name:      "sort_collection",
+			class:     FUNC_CLASS_STATS,
+			help:      `This is a helper function for the percentiles function; please see its online help for details.`,
+			unaryFunc: bifs.BIF_sort_collection,
+			examples:  []string{},
+		},
+
 		// ----------------------------------------------------------------
 		// FUNC_CLASS_TIME
 
diff --git a/internal/pkg/mlrval/mlrval_collections.go b/internal/pkg/mlrval/mlrval_collections.go
index 6674a044a0..5e009aff29 100644
--- a/internal/pkg/mlrval/mlrval_collections.go
+++ b/internal/pkg/mlrval/mlrval_collections.go
@@ -739,3 +739,51 @@ func LengthenMlrvalArray(array *[]*Mlrval, newLength64 int) {
 		*array = newArray
 	}
 }
+
+// ArrayFold reduces an array to a single value, with a user-supplied starting value and pairwise
+// element-reducer function. Canonical example: start value is 0 and reducer f(a,b) is a+b: this
+// will sum up the values in the array.
+func ArrayFold(
+	a []*Mlrval,
+	initval *Mlrval,
+	f func(a, b *Mlrval) *Mlrval,
+) *Mlrval {
+	acc := initval
+	for _, e := range a {
+		acc = f(acc, e)
+	}
+	return acc
+}
+
+// MapFold reduces a map's values to a single value, with a user-supplied starting value and
+// pairwise element-reducer function. Canonical example: start value is 0 and reducer f(a,b) is a+b:
+// this will sum up the values in the map. Nothing here accesses map keys.
+func MapFold(
+	m *Mlrmap,
+	initval *Mlrval,
+	f func(a, b *Mlrval) *Mlrval,
+) *Mlrval {
+	acc := initval
+	for pe := m.Head; pe != nil; pe = pe.Next {
+		acc = f(acc, pe.Value)
+	}
+	return acc
+}
+
+// CollectionFold multiplexes ArrayFold or MapFold. The panic here is not robust, but is done to
+// avoid adding an error-return that would frictionalize the API.  The idea is that the caller
+// (internal/library functions, not directly user-facing) must have pre-validated that the argument
+// is an array or map. The panic here is merely a fallback, not the primary check.
+func CollectionFold(
+	c *Mlrval,
+	initval *Mlrval,
+	f func(a, b *Mlrval) *Mlrval,
+) *Mlrval {
+	if c.IsArray() {
+		return ArrayFold(c.AcquireArrayValue(), initval, f)
+	} else if c.IsMap() {
+		return MapFold(c.AcquireMapValue(), initval, f)
+	} else {
+		panic("CollectionFold argument is neither array nor map")
+	}
+}
diff --git a/internal/pkg/mlrval/mlrval_new.go b/internal/pkg/mlrval/mlrval_new.go
index 0ac8d2613f..a46bc73a07 100644
--- a/internal/pkg/mlrval/mlrval_new.go
+++ b/internal/pkg/mlrval/mlrval_new.go
@@ -222,6 +222,12 @@ func FromArray(arrayval []*Mlrval) *Mlrval {
 	}
 }
 
+func FromSingletonArray(element *Mlrval) *Mlrval {
+	a := make([]*Mlrval, 1)
+	a[0] = element
+	return FromArray(a)
+}
+
 func FromEmptyArray() *Mlrval {
 	return FromArray(make([]*Mlrval, 0))
 }
diff --git a/internal/pkg/transformers/utils/percentile_keeper.go b/internal/pkg/transformers/utils/percentile_keeper.go
index 41be046529..c9f2453bd3 100644
--- a/internal/pkg/transformers/utils/percentile_keeper.go
+++ b/internal/pkg/transformers/utils/percentile_keeper.go
@@ -6,7 +6,6 @@ package utils
 
 import (
 	"fmt"
-	"math"
 	"sort"
 
 	"github.com/johnkerl/miller/internal/pkg/bifs"
@@ -55,209 +54,6 @@ func (keeper *PercentileKeeper) Ingest(value *mlrval.Mlrval) {
 	keeper.sorted = false
 }
 
-// ================================================================
-// Non-interpolated percentiles (see also https://en.wikipedia.org/wiki/Percentile)
-
-// ----------------------------------------------------------------
-// OPTION 1: int index = p*n/100.0;
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
-// x_p01 0 x_p11  0 x_p21 20 x_p31 20 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81  80 x_p91 100
-// x_p02 0 x_p12  0 x_p22 20 x_p32 20 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82  80 x_p92 100
-// x_p03 0 x_p13  0 x_p23 20 x_p33 20 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83  80 x_p93 100
-// x_p04 0 x_p14  0 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 100 x_p94 100
-// x_p05 0 x_p15  0 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 100 x_p95 100
-// x_p06 0 x_p16  0 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 100 x_p96 100
-// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
-// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
-// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00 0 x_p10 0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 75 x_p70 75 x_p80 100 x_p90 100 x_p100 100
-// x_p01 0 x_p11 0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 75 x_p71 75 x_p81 100 x_p91 100
-// x_p02 0 x_p12 0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 75 x_p72 75 x_p82 100 x_p92 100
-// x_p03 0 x_p13 0 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83 100 x_p93 100
-// x_p04 0 x_p14 0 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84 100 x_p94 100
-// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85 100 x_p95 100
-// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86 100 x_p96 100
-// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87 100 x_p97 100
-// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
-// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
-//
-// ----------------------------------------------------------------
-// OPTION 2: int index = p*(n-1)/100.0;
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00 0 x_p10 0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90 80 x_p100 100
-// x_p01 0 x_p11 0 x_p21 20 x_p31 20 x_p41 40 x_p51 40 x_p61 60 x_p71 60 x_p81 80 x_p91 80
-// x_p02 0 x_p12 0 x_p22 20 x_p32 20 x_p42 40 x_p52 40 x_p62 60 x_p72 60 x_p82 80 x_p92 80
-// x_p03 0 x_p13 0 x_p23 20 x_p33 20 x_p43 40 x_p53 40 x_p63 60 x_p73 60 x_p83 80 x_p93 80
-// x_p04 0 x_p14 0 x_p24 20 x_p34 20 x_p44 40 x_p54 40 x_p64 60 x_p74 60 x_p84 80 x_p94 80
-// x_p05 0 x_p15 0 x_p25 20 x_p35 20 x_p45 40 x_p55 40 x_p65 60 x_p75 60 x_p85 80 x_p95 80
-// x_p06 0 x_p16 0 x_p26 20 x_p36 20 x_p46 40 x_p56 40 x_p66 60 x_p76 60 x_p86 80 x_p96 80
-// x_p07 0 x_p17 0 x_p27 20 x_p37 20 x_p47 40 x_p57 40 x_p67 60 x_p77 60 x_p87 80 x_p97 80
-// x_p08 0 x_p18 0 x_p28 20 x_p38 20 x_p48 40 x_p58 40 x_p68 60 x_p78 60 x_p88 80 x_p98 80
-// x_p09 0 x_p19 0 x_p29 20 x_p39 20 x_p49 40 x_p59 40 x_p69 60 x_p79 60 x_p89 80 x_p99 80
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00 0 x_p10 0 x_p20  0 x_p30 25 x_p40 25 x_p50 50 x_p60 50 x_p70 50 x_p80 75 x_p90 75 x_p100 100
-// x_p01 0 x_p11 0 x_p21  0 x_p31 25 x_p41 25 x_p51 50 x_p61 50 x_p71 50 x_p81 75 x_p91 75
-// x_p02 0 x_p12 0 x_p22  0 x_p32 25 x_p42 25 x_p52 50 x_p62 50 x_p72 50 x_p82 75 x_p92 75
-// x_p03 0 x_p13 0 x_p23  0 x_p33 25 x_p43 25 x_p53 50 x_p63 50 x_p73 50 x_p83 75 x_p93 75
-// x_p04 0 x_p14 0 x_p24  0 x_p34 25 x_p44 25 x_p54 50 x_p64 50 x_p74 50 x_p84 75 x_p94 75
-// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 25 x_p55 50 x_p65 50 x_p75 75 x_p85 75 x_p95 75
-// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 25 x_p56 50 x_p66 50 x_p76 75 x_p86 75 x_p96 75
-// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 25 x_p57 50 x_p67 50 x_p77 75 x_p87 75 x_p97 75
-// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 25 x_p58 50 x_p68 50 x_p78 75 x_p88 75 x_p98 75
-// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 25 x_p59 50 x_p69 50 x_p79 75 x_p89 75 x_p99 75
-//
-// ----------------------------------------------------------------
-// OPTION 3: int index = (int)ceil(p*(n-1)/100.0);
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00  0 x_p10 20 x_p20 20 x_p30 40 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
-// x_p01 20 x_p11 20 x_p21 40 x_p31 40 x_p41 60 x_p51 60 x_p61 80 x_p71 80 x_p81 100 x_p91 100
-// x_p02 20 x_p12 20 x_p22 40 x_p32 40 x_p42 60 x_p52 60 x_p62 80 x_p72 80 x_p82 100 x_p92 100
-// x_p03 20 x_p13 20 x_p23 40 x_p33 40 x_p43 60 x_p53 60 x_p63 80 x_p73 80 x_p83 100 x_p93 100
-// x_p04 20 x_p14 20 x_p24 40 x_p34 40 x_p44 60 x_p54 60 x_p64 80 x_p74 80 x_p84 100 x_p94 100
-// x_p05 20 x_p15 20 x_p25 40 x_p35 40 x_p45 60 x_p55 60 x_p65 80 x_p75 80 x_p85 100 x_p95 100
-// x_p06 20 x_p16 20 x_p26 40 x_p36 40 x_p46 60 x_p56 60 x_p66 80 x_p76 80 x_p86 100 x_p96 100
-// x_p07 20 x_p17 20 x_p27 40 x_p37 40 x_p47 60 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
-// x_p08 20 x_p18 20 x_p28 40 x_p38 40 x_p48 60 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
-// x_p09 20 x_p19 20 x_p29 40 x_p39 40 x_p49 60 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00  0 x_p10 25 x_p20 25 x_p30 50 x_p40 50 x_p50 50 x_p60 75 x_p70  75 x_p80 100 x_p90 100 x_p100 100
-// x_p01 25 x_p11 25 x_p21 25 x_p31 50 x_p41 50 x_p51 75 x_p61 75 x_p71  75 x_p81 100 x_p91 100
-// x_p02 25 x_p12 25 x_p22 25 x_p32 50 x_p42 50 x_p52 75 x_p62 75 x_p72  75 x_p82 100 x_p92 100
-// x_p03 25 x_p13 25 x_p23 25 x_p33 50 x_p43 50 x_p53 75 x_p63 75 x_p73  75 x_p83 100 x_p93 100
-// x_p04 25 x_p14 25 x_p24 25 x_p34 50 x_p44 50 x_p54 75 x_p64 75 x_p74  75 x_p84 100 x_p94 100
-// x_p05 25 x_p15 25 x_p25 25 x_p35 50 x_p45 50 x_p55 75 x_p65 75 x_p75  75 x_p85 100 x_p95 100
-// x_p06 25 x_p16 25 x_p26 50 x_p36 50 x_p46 50 x_p56 75 x_p66 75 x_p76 100 x_p86 100 x_p96 100
-// x_p07 25 x_p17 25 x_p27 50 x_p37 50 x_p47 50 x_p57 75 x_p67 75 x_p77 100 x_p87 100 x_p97 100
-// x_p08 25 x_p18 25 x_p28 50 x_p38 50 x_p48 50 x_p58 75 x_p68 75 x_p78 100 x_p88 100 x_p98 100
-// x_p09 25 x_p19 25 x_p29 50 x_p39 50 x_p49 50 x_p59 75 x_p69 75 x_p79 100 x_p89 100 x_p99 100
-//
-// ----------------------------------------------------------------
-// OPTION 4: int index = (int)ceil(-0.5 + p*(n-1)/100.0);
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90  80 x_p100 100
-// x_p01 0 x_p11 20 x_p21 20 x_p31 40 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81 80 x_p91 100
-// x_p02 0 x_p12 20 x_p22 20 x_p32 40 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82 80 x_p92 100
-// x_p03 0 x_p13 20 x_p23 20 x_p33 40 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83 80 x_p93 100
-// x_p04 0 x_p14 20 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 80 x_p94 100
-// x_p05 0 x_p15 20 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 80 x_p95 100
-// x_p06 0 x_p16 20 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 80 x_p96 100
-// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 60 x_p77 80 x_p87 80 x_p97 100
-// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 60 x_p78 80 x_p88 80 x_p98 100
-// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 60 x_p79 80 x_p89 80 x_p99 100
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00 0 x_p10  0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 50 x_p70 75 x_p80  75 x_p90 100 x_p100 100
-// x_p01 0 x_p11  0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 50 x_p71 75 x_p81  75 x_p91 100
-// x_p02 0 x_p12  0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 50 x_p72 75 x_p82  75 x_p92 100
-// x_p03 0 x_p13 25 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83  75 x_p93 100
-// x_p04 0 x_p14 25 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84  75 x_p94 100
-// x_p05 0 x_p15 25 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85  75 x_p95 100
-// x_p06 0 x_p16 25 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86  75 x_p96 100
-// x_p07 0 x_p17 25 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87  75 x_p97 100
-// x_p08 0 x_p18 25 x_p28 25 x_p38 50 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
-// x_p09 0 x_p19 25 x_p29 25 x_p39 50 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
-//
-// ----------------------------------------------------------------
-// CONCLUSION:
-// * I like option 2 for its simplicity ...
-// * ... but option 1 matches R's quantile with type=1.
-// * (Note that Miller's interpolated percentiles match match R's quantile with type=7)
-// ----------------------------------------------------------------
-
-func computeIndexNoninterpolated(n int, p float64) int {
-	index := int(p * float64(n) / 100.0)
-	//index := p * (float64(float64(n)) - 1) / 100.0
-	//index := int(ceil(p * (float64(n) - 1) / 100.0))
-	//index := int(ceil(-0.5 + p*(float64(n)-1)/100.0))
-	if index >= n {
-		index = n - 1
-	}
-	if index < 0 {
-		index = 0
-	}
-	return index
-}
-
-// xxx pending pointer-output refactor
-func getPercentileLinearlyInterpolated(array []*mlrval.Mlrval, n int, p float64) mlrval.Mlrval {
-	findex := (p / 100.0) * (float64(n) - 1)
-	if findex < 0.0 {
-		findex = 0.0
-	}
-	iindex := int(math.Floor(findex))
-	if iindex >= n-1 {
-		return *array[iindex].Copy()
-	} else {
-		// array[iindex] + frac * (array[iindex+1] - array[iindex])
-		// TODO: just do this in float64.
-		frac := mlrval.FromFloat(findex - float64(iindex))
-		diff := bifs.BIF_minus_binary(array[iindex+1], array[iindex])
-		prod := bifs.BIF_times(frac, diff)
-		return *bifs.BIF_plus_binary(array[iindex], prod)
-	}
-}
-
 // ----------------------------------------------------------------
 func (keeper *PercentileKeeper) sortIfNecessary() {
 	if !keeper.sorted {
@@ -282,7 +78,7 @@ func (keeper *PercentileKeeper) EmitNonInterpolated(percentile float64) *mlrval.
 		return mlrval.VOID
 	}
 	keeper.sortIfNecessary()
-	return keeper.data[computeIndexNoninterpolated(int(len(keeper.data)), percentile)].Copy()
+	return bifs.GetPercentileNonInterpolated(keeper.data, int(len(keeper.data)), percentile)
 }
 
 func (keeper *PercentileKeeper) EmitLinearlyInterpolated(percentile float64) *mlrval.Mlrval {
@@ -290,8 +86,7 @@ func (keeper *PercentileKeeper) EmitLinearlyInterpolated(percentile float64) *ml
 		return mlrval.VOID
 	}
 	keeper.sortIfNecessary()
-	output := getPercentileLinearlyInterpolated(keeper.data, int(len(keeper.data)), percentile)
-	return output.Copy()
+	return bifs.GetPercentileLinearlyInterpolated(keeper.data, int(len(keeper.data)), percentile)
 }
 
 // ----------------------------------------------------------------
diff --git a/internal/pkg/transformers/utils/stats1_accumulators.go b/internal/pkg/transformers/utils/stats1_accumulators.go
index d85cadf66b..c984ed9229 100644
--- a/internal/pkg/transformers/utils/stats1_accumulators.go
+++ b/internal/pkg/transformers/utils/stats1_accumulators.go
@@ -615,7 +615,7 @@ func (acc *Stats1VarAccumulator) Ingest(value *mlrval.Mlrval) {
 	}
 }
 func (acc *Stats1VarAccumulator) Emit() *mlrval.Mlrval {
-	return bifs.BIF_get_var(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
+	return bifs.BIF_finalize_variance(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
 }
 func (acc *Stats1VarAccumulator) Reset() {
 	acc.count = 0
@@ -646,7 +646,7 @@ func (acc *Stats1StddevAccumulator) Ingest(value *mlrval.Mlrval) {
 	}
 }
 func (acc *Stats1StddevAccumulator) Emit() *mlrval.Mlrval {
-	return bifs.BIF_get_stddev(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
+	return bifs.BIF_finalize_stddev(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
 }
 func (acc *Stats1StddevAccumulator) Reset() {
 	acc.count = 0
@@ -678,7 +678,7 @@ func (acc *Stats1MeanEBAccumulator) Ingest(value *mlrval.Mlrval) {
 }
 func (acc *Stats1MeanEBAccumulator) Emit() *mlrval.Mlrval {
 	mcount := mlrval.FromInt(acc.count)
-	return bifs.BIF_get_mean_EB(mcount, acc.sum, acc.sum2)
+	return bifs.BIF_finalize_mean_eb(mcount, acc.sum, acc.sum2)
 }
 func (acc *Stats1MeanEBAccumulator) Reset() {
 	acc.count = 0
@@ -714,7 +714,7 @@ func (acc *Stats1SkewnessAccumulator) Ingest(value *mlrval.Mlrval) {
 }
 func (acc *Stats1SkewnessAccumulator) Emit() *mlrval.Mlrval {
 	mcount := mlrval.FromInt(acc.count)
-	return bifs.BIF_get_skewness(mcount, acc.sum, acc.sum2, acc.sum3)
+	return bifs.BIF_finalize_skewness(mcount, acc.sum, acc.sum2, acc.sum3)
 }
 func (acc *Stats1SkewnessAccumulator) Reset() {
 	acc.count = 0
@@ -755,7 +755,7 @@ func (acc *Stats1KurtosisAccumulator) Ingest(value *mlrval.Mlrval) {
 }
 func (acc *Stats1KurtosisAccumulator) Emit() *mlrval.Mlrval {
 	mcount := mlrval.FromInt(acc.count)
-	return bifs.BIF_get_kurtosis(mcount, acc.sum, acc.sum2, acc.sum3, acc.sum4)
+	return bifs.BIF_finalize_kurtosis(mcount, acc.sum, acc.sum2, acc.sum3, acc.sum4)
 }
 func (acc *Stats1KurtosisAccumulator) Reset() {
 	acc.count = 0
diff --git a/man/manpage.txt b/man/manpage.txt
index b3352b9a62..666177bee9 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -182,32 +182,34 @@ MILLER(1)                                                            MILLER(1)
        unsparsify
 
 1mFUNCTION LIST0m
-       abs acos acosh any append apply arrayify asin asinh asserting_absent
+       abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
        asserting_array asserting_bool asserting_boolean asserting_empty
        asserting_empty_map asserting_error asserting_float asserting_int
        asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty
        asserting_not_map asserting_not_null asserting_null asserting_numeric
        asserting_present asserting_string atan atan2 atanh bitcount boolean
        capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh
-       depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor
-       fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values
-       gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec
-       hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty
-       is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
+       count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1
+       flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys
+       get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec
+       hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean
+       is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
        is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present
-       is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8
+       is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8
        leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10
-       log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min
-       mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm
+       log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5
+       mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate
+       nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm
        reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms
        sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256
-       sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt
-       ssub strfntime strfntime_local strftime strftime_local string strip strlen
-       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
-       sysntime system systime systimeint tan tanh tolower toupper truncate typeof
-       unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .-
-       ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx
+       splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime
+       strftime_local string strip strlen strpntime strpntime_local strptime
+       strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system
+       systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat
+       unformatx upntime uptime urand urand32 urandelement urandint urandrange
+       utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // <
+       << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2164,6 +2166,12 @@ MILLER(1)                                                            MILLER(1)
    1macosh0m
         (class=math #args=1) Inverse hyperbolic cosine.
 
+   1mantimode0m
+        (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+       Examples:
+       antimode([3,3,4,4,4]) is 3
+       antimode([3,3,4,4]) is 3
+
    1many0m
         (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean.
        Examples:
@@ -2288,6 +2296,12 @@ MILLER(1)                                                            MILLER(1)
    1mcosh0m
         (class=math #args=1) Hyperbolic cosine.
 
+   1mcount0m
+        (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types.
+       Examples:
+       count([7,8,9]) is 3
+       count({"a":7,"b":8,"c":9}) is 3
+
    1mdepth0m
         (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0.
 
@@ -2297,6 +2311,13 @@ MILLER(1)                                                            MILLER(1)
    1mdhms2sec0m
         (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000
 
+   1mdistinct_count0m
+        (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+       Examples:
+       distinct_count([7,8,9,7])  is 3
+       distinct_count([1,"1"]) is 1
+       distinct_count([1,1.0]) is 2
+
    1merf0m
         (class=math #args=1) Error function.
 
@@ -2521,6 +2542,11 @@ MILLER(1)                                                            MILLER(1)
    1mjson_stringify0m
         (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output.
 
+   1mkurtosis0m
+        (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       kurtosis([4,5,9,10,11]) is -1.6703688
+
    1mlatin1_to_utf80m
         (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it.
        Examples:
@@ -2589,20 +2615,53 @@ MILLER(1)                                                            MILLER(1)
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
    1mmax0m
-        (class=math #args=variadic) Max of n numbers; null loses.
+        (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+
+   1mmaxlen0m
+        (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       maxlen(["ao", "alto"]) is 4
 
    1mmd50m
         (class=hashing #args=1) MD5 hash.
 
+   1mmean0m
+        (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+       Example:
+       mean([4,5,7,10]) is 6.5
+
+   1mmeaneb0m
+        (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+       Example:
+       meaneb([4,5,7,10]) is 1.3228756
+
+   1mmedian0m
+        (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+       Examples:
+       median([3,4,5,6,9,10]) is 6
+       median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
+       median(["abc", "def", "ghi", "ghi"]) is "ghi"
+
    1mmexp0m
         (class=arithmetic #args=3) a ** b mod m (integers)
 
    1mmin0m
-        (class=math #args=variadic) Min of n numbers; null loses.
+        (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+
+   1mminlen0m
+        (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       minlen(["ao", "alto"]) is 3
 
    1mmmul0m
         (class=arithmetic #args=3) a * b mod m (integers)
 
+   1mmode0m
+        (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+       Examples:
+       mode([3,3,4,4,4]) is 4
+       mode([3,3,4,4]) is 3
+
    1mmsub0m
         (class=arithmetic #args=3) a - b mod m (integers)
 
@@ -2632,9 +2691,70 @@ MILLER(1)                                                            MILLER(1)
        nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul"
        nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456"
 
+   1mnull_count0m
+        (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+       Example:
+       null_count(["a", "", "c"]) is 1
+
    1mos0m
         (class=system #args=0) Returns the operating-system name as a string.
 
+   1mpercentile0m
+        (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+       Examples:
+       percentile([3,4,5,6,9,10], 90) is 10
+       percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
+       percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
+
+   1mpercentiles0m
+        (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+       Examples:
+
+       Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
+       the input before computing percentiles:
+
+         percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }
+         percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }
+
+       Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:
+
+         percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]
+
+       Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces
+       ,error on string inputs:
+
+         percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }
+
+       The percentiles function always sorts its inputs before computing percentiles. If you know your input
+       is already sorted -- see also the sort_collection function -- then computation will be faster on
+       large input if you pass in "array_is_sorted":
+
+         x = [6,5,9,10,4,3]
+         percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect
+         x = sort_collection(x)
+         percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct
+
+       You can also leverage this feature to compute percentiles on a sort of your choosing. For example:
+
+         Non-sorted input:
+           x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")
+           x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]
+         Percentiles are taken over the original positions of the words in the array -- "dogs" is last
+         and hence appears as p99:
+           percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]
+         With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:
+           percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+         With default sorting done outside percentiles, the same:
+           x = sort(x) # or x = sort_collection(x)
+           x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]
+           percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]
+           percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+         Now sorting by word length, "loquaciously" is longest and hence is the p99:
+           x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )
+           x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]
+           percentiles(x, [50, 99], {"oa":true, "ais":true})
+           ["over", "loquaciously"]
+
    1mpow0m
         (class=arithmetic #args=2) Exponentiation. Same as **, but as a function.
 
@@ -2731,6 +2851,11 @@ MILLER(1)                                                            MILLER(1)
    1msinh0m
         (class=math #args=1) Hyperbolic sine.
 
+   1mskewness0m
+        (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       skewness([4,5,9,10,11]) is -0.2097285
+
    1msort0m
         (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values.
        Examples:
@@ -2747,6 +2872,9 @@ MILLER(1)                                                            MILLER(1)
        Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}.
        Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}.
 
+   1msort_collection0m
+        (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details.
+
    1msplita0m
         (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on.
        Example:
@@ -2785,6 +2913,11 @@ MILLER(1)                                                            MILLER(1)
        Example:
        ssub("abc.def", ".", "X") gives "abcXdef"
 
+   1mstddev0m
+        (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       stddev([4,5,9,10,11]) is 3.1144823
+
    1mstrfntime0m
         (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local.
        Examples:
@@ -2872,6 +3005,26 @@ MILLER(1)                                                            MILLER(1)
    1msubstr10m
         (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0.
 
+   1msum0m
+        (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum([1,2,3,4,5]) is 15
+
+   1msum20m
+        (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum2([1,2,3,4,5]) is 55
+
+   1msum30m
+        (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum3([1,2,3,4,5]) is 225
+
+   1msum40m
+        (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum4([1,2,3,4,5]) is 979
+
    1msysntime0m
         (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch.
 
@@ -2950,6 +3103,11 @@ MILLER(1)                                                            MILLER(1)
        $y = utf8_to_latin1($x)
        $* = utf8_to_latin1($*)
 
+   1mvariance0m
+        (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       variance([4,5,9,10,11]) is 9.7
+
    1mversion0m
         (class=system #args=0) Returns the Miller version as a string.
 
@@ -3451,4 +3609,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-23                         MILLER(1)
+                                  2023-08-26                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index b7c343ce11..91d501b6b2 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-08-23
+.\"      Date: 2023-08-26
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-08-23" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-08-26" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -229,32 +229,34 @@ unsparsify
 .RS 0
 .\}
 .nf
-abs acos acosh any append apply arrayify asin asinh asserting_absent
+abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
 asserting_array asserting_bool asserting_boolean asserting_empty
 asserting_empty_map asserting_error asserting_float asserting_int
 asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty
 asserting_not_map asserting_not_null asserting_null asserting_numeric
 asserting_present asserting_string atan atan2 atanh bitcount boolean
 capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh
-depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor
-fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values
-gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec
-hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty
-is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
+count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1
+flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys
+get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec
+hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean
+is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
 is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present
-is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8
+is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8
 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10
-log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min
-mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm
+log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5
+mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate
+nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm
 reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms
 sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256
-sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt
-ssub strfntime strfntime_local strftime strftime_local string strip strlen
-strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
-sysntime system systime systimeint tan tanh tolower toupper truncate typeof
-unflatten unformat unformatx upntime uptime urand urand32 urandelement
-urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .-
-\&./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx
+splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime
+strftime_local string strip strlen strpntime strpntime_local strptime
+strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system
+systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat
+unformatx upntime uptime urand urand32 urandelement urandint urandrange
+utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // <
+<< <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
 .fi
 .if n \{\
 .RE
@@ -2765,6 +2767,18 @@ being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
 .fi
 .if n \{\
 .RE
+.SS "antimode"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+antimode([3,3,4,4,4]) is 3
+antimode([3,3,4,4]) is 3
+.fi
+.if n \{\
+.RE
 .SS "any"
 .if n \{\
 .RS 0
@@ -3117,6 +3131,18 @@ concat([1,2],[3]) is [1,2,3]
 .fi
 .if n \{\
 .RE
+.SS "count"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types.
+Examples:
+count([7,8,9]) is 3
+count({"a":7,"b":8,"c":9}) is 3
+.fi
+.if n \{\
+.RE
 .SS "depth"
 .if n \{\
 .RS 0
@@ -3144,6 +3170,19 @@ concat([1,2],[3]) is [1,2,3]
 .fi
 .if n \{\
 .RE
+.SS "distinct_count"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Examples:
+distinct_count([7,8,9,7])  is 3
+distinct_count([1,"1"]) is 1
+distinct_count([1,1.0]) is 2
+.fi
+.if n \{\
+.RE
 .SS "erf"
 .if n \{\
 .RS 0
@@ -3698,6 +3737,17 @@ joinv({"a":3,"b":4,"c":5}, ",") = "3,4,5"
 .fi
 .if n \{\
 .RE
+.SS "kurtosis"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+kurtosis([4,5,9,10,11]) is -1.6703688
+.fi
+.if n \{\
+.RE
 .SS "latin1_to_utf8"
 .if n \{\
 .RS 0
@@ -3872,7 +3922,18 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .RS 0
 .\}
 .nf
- (class=math #args=variadic) Max of n numbers; null loses.
+ (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+.fi
+.if n \{\
+.RE
+.SS "maxlen"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+maxlen(["año", "alto"]) is 4
 .fi
 .if n \{\
 .RE
@@ -3885,6 +3946,41 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "mean"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+mean([4,5,7,10]) is 6.5
+.fi
+.if n \{\
+.RE
+.SS "meaneb"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+meaneb([4,5,7,10]) is 1.3228756
+.fi
+.if n \{\
+.RE
+.SS "median"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+median([3,4,5,6,9,10]) is 6
+median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
+median(["abc", "def", "ghi", "ghi"]) is "ghi"
+.fi
+.if n \{\
+.RE
 .SS "mexp"
 .if n \{\
 .RS 0
@@ -3899,7 +3995,18 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .RS 0
 .\}
 .nf
- (class=math #args=variadic) Min of n numbers; null loses.
+ (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+.fi
+.if n \{\
+.RE
+.SS "minlen"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+minlen(["año", "alto"]) is 3
 .fi
 .if n \{\
 .RE
@@ -3912,6 +4019,18 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "mode"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+mode([3,3,4,4,4]) is 4
+mode([3,3,4,4]) is 3
+.fi
+.if n \{\
+.RE
 .SS "msub"
 .if n \{\
 .RS 0
@@ -3971,6 +4090,17 @@ nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.1
 .fi
 .if n \{\
 .RE
+.SS "null_count"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Example:
+null_count(["a", "", "c"]) is 1
+.fi
+.if n \{\
+.RE
 .SS "os"
 .if n \{\
 .RS 0
@@ -3980,6 +4110,74 @@ nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.1
 .fi
 .if n \{\
 .RE
+.SS "percentile"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+percentile([3,4,5,6,9,10], 90) is 10
+percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
+percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
+.fi
+.if n \{\
+.RE
+.SS "percentiles"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+Examples:
+
+Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
+the input before computing percentiles:
+
+  percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }
+  percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }
+
+Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]
+
+Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces
+,error on string inputs:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }
+
+The percentiles function always sorts its inputs before computing percentiles. If you know your input
+is already sorted -- see also the sort_collection function -- then computation will be faster on
+large input if you pass in "array_is_sorted":
+
+  x = [6,5,9,10,4,3]
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect
+  x = sort_collection(x)
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct
+
+You can also leverage this feature to compute percentiles on a sort of your choosing. For example:
+
+  Non-sorted input:
+    x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")
+    x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]
+  Percentiles are taken over the original positions of the words in the array -- "dogs" is last
+  and hence appears as p99:
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]
+  With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  With default sorting done outside percentiles, the same:
+    x = sort(x) # or x = sort_collection(x)
+    x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  Now sorting by word length, "loquaciously" is longest and hence is the p99:
+    x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )
+    x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true})
+    ["over", "loquaciously"]
+.fi
+.if n \{\
+.RE
 .SS "pow"
 .if n \{\
 .RS 0
@@ -4208,6 +4406,17 @@ Map example: select({"a":1, "b":3, "c":5}, func(k,v) {return v >= 3}) returns {"
 .fi
 .if n \{\
 .RE
+.SS "skewness"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+skewness([4,5,9,10,11]) is -0.2097285
+.fi
+.if n \{\
+.RE
 .SS "sort"
 .if n \{\
 .RS 0
@@ -4230,6 +4439,15 @@ Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":
 .fi
 .if n \{\
 .RE
+.SS "sort_collection"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details.
+.fi
+.if n \{\
+.RE
 .SS "splita"
 .if n \{\
 .RS 0
@@ -4316,6 +4534,17 @@ ssub("abc.def", ".", "X") gives "abcXdef"
 .fi
 .if n \{\
 .RE
+.SS "stddev"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+stddev([4,5,9,10,11]) is 3.1144823
+.fi
+.if n \{\
+.RE
 .SS "strfntime"
 .if n \{\
 .RS 0
@@ -4493,6 +4722,50 @@ sub("prefix4529:suffix8567", "suffix([0-9]+)", "name\e1") gives "prefix4529:name
 .fi
 .if n \{\
 .RE
+.SS "sum"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum([1,2,3,4,5]) is 15
+.fi
+.if n \{\
+.RE
+.SS "sum2"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum2([1,2,3,4,5]) is 55
+.fi
+.if n \{\
+.RE
+.SS "sum3"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum3([1,2,3,4,5]) is 225
+.fi
+.if n \{\
+.RE
+.SS "sum4"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum4([1,2,3,4,5]) is 979
+.fi
+.if n \{\
+.RE
 .SS "sysntime"
 .if n \{\
 .RS 0
@@ -4697,6 +4970,17 @@ $* = utf8_to_latin1($*)
 .fi
 .if n \{\
 .RE
+.SS "variance"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+variance([4,5,9,10,11]) is 9.7
+.fi
+.if n \{\
+.RE
 .SS "version"
 .if n \{\
 .RS 0
diff --git a/test/cases/dsl-stats/count/various/cmd b/test/cases/dsl-stats/count/various/cmd
new file mode 100644
index 0000000000..8e64fdff2f
--- /dev/null
+++ b/test/cases/dsl-stats/count/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/count/various/experr b/test/cases/dsl-stats/count/various/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/count/various/expout b/test/cases/dsl-stats/count/various/expout
new file mode 100644
index 0000000000..9e4f467e00
--- /dev/null
+++ b/test/cases/dsl-stats/count/various/expout
@@ -0,0 +1,20 @@
+count_0                 (error)
+count_0_type            error
+count_null              (error)
+count_null_type         error
+count_empty_array       0
+count_empty_array_type  int
+count_array_1           1
+count_array_1_type      int
+count_array_3           3
+count_array_3_type      int
+count_array_nested      3
+count_array_nested_type int
+count_empty_map         0
+count_empty_map_type    int
+count_map_1             1
+count_map_1_type        int
+count_map_3             3
+count_map_3_type        int
+count_map_nested        3
+count_map_nested_type   int
diff --git a/test/cases/dsl-stats/count/various/mlr b/test/cases/dsl-stats/count/various/mlr
new file mode 100644
index 0000000000..39e9abd8ef
--- /dev/null
+++ b/test/cases/dsl-stats/count/various/mlr
@@ -0,0 +1,26 @@
+end {
+    outputs = {};
+
+    outputs["count_0"] = count(0);
+    outputs["count_null"] = count(null);
+    outputs["count_nonesuch"] = count(nonesuch);
+
+    outputs["count_empty_array"] = count([]);
+    outputs["count_array_1"] = count([7]);
+    outputs["count_array_3"] = count([7,8,9]);
+    outputs["count_array_nested"] = count([7,[80,90],9]);
+
+    outputs["count_empty_map"] = count({});
+    outputs["count_map_1"] = count({ "a" : 7} );
+    outputs["count_map_3"] = count({ "a" : 7, "b" : 8, "c" : 9 } );
+    outputs["count_map_nested"] = count({ "a" : 7, "b" : [80,90], "c" : 9 });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/distinct_count/various/cmd b/test/cases/dsl-stats/distinct_count/various/cmd
new file mode 100644
index 0000000000..8e64fdff2f
--- /dev/null
+++ b/test/cases/dsl-stats/distinct_count/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/distinct_count/various/experr b/test/cases/dsl-stats/distinct_count/various/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/distinct_count/various/expout b/test/cases/dsl-stats/distinct_count/various/expout
new file mode 100644
index 0000000000..8d2416554b
--- /dev/null
+++ b/test/cases/dsl-stats/distinct_count/various/expout
@@ -0,0 +1,32 @@
+distinct_count_0                 (error)
+distinct_count_0_type            error
+distinct_count_null              (error)
+distinct_count_null_type         error
+distinct_count_empty_array       0
+distinct_count_empty_array_type  int
+distinct_count_array_1           1
+distinct_count_array_1_type      int
+distinct_count_array_3a          3
+distinct_count_array_3a_type     int
+distinct_count_array_3b          2
+distinct_count_array_3b_type     int
+distinct_count_array_3c          1
+distinct_count_array_3c_type     int
+distinct_count_array_3d          1
+distinct_count_array_3d_type     int
+distinct_count_array_nested      2
+distinct_count_array_nested_type int
+distinct_count_empty_map         0
+distinct_count_empty_map_type    int
+distinct_count_map_1             1
+distinct_count_map_1_type        int
+distinct_count_map_3a            3
+distinct_count_map_3a_type       int
+distinct_count_map_3b            2
+distinct_count_map_3b_type       int
+distinct_count_map_3c            1
+distinct_count_map_3c_type       int
+distinct_count_map_3d            1
+distinct_count_map_3d_type       int
+distinct_count_map_nested        2
+distinct_count_map_nested_type   int
diff --git a/test/cases/dsl-stats/distinct_count/various/mlr b/test/cases/dsl-stats/distinct_count/various/mlr
new file mode 100644
index 0000000000..f98ceb66e1
--- /dev/null
+++ b/test/cases/dsl-stats/distinct_count/various/mlr
@@ -0,0 +1,32 @@
+end {
+    outputs = {};
+
+    outputs["distinct_count_0"] = distinct_count(0);
+    outputs["distinct_count_null"] = distinct_count(null);
+    outputs["distinct_count_nonesuch"] = distinct_count(nonesuch);
+
+    outputs["distinct_count_empty_array"] = distinct_count([]);
+    outputs["distinct_count_array_1"] = distinct_count([7]);
+    outputs["distinct_count_array_3a"] = distinct_count([7,8,9]);
+    outputs["distinct_count_array_3b"] = distinct_count([7,7,9]);
+    outputs["distinct_count_array_3c"] = distinct_count([7,7,7]);
+    outputs["distinct_count_array_3d"] = distinct_count([null,null,null]);
+    outputs["distinct_count_array_nested"] = distinct_count([7,[7],7]);
+
+    outputs["distinct_count_empty_map"] = distinct_count({});
+    outputs["distinct_count_map_1"] = distinct_count({ "a" : 7} );
+    outputs["distinct_count_map_3a"] = distinct_count({ "a" : 7, "b" : 8, "c" : 9 } );
+    outputs["distinct_count_map_3b"] = distinct_count({ "a" : 7, "b" : 7, "c" : 9 } );
+    outputs["distinct_count_map_3c"] = distinct_count({ "a" : 7, "b" : 7, "c" : 7 } );
+    outputs["distinct_count_map_3d"] = distinct_count({ "a" : null, "b" : null, "c" : null } );
+    outputs["distinct_count_map_nested"] = distinct_count({ "a" : 7, "b" : [7], "c" : 7 });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/mode/various/cmd b/test/cases/dsl-stats/mode/various/cmd
new file mode 100644
index 0000000000..8e64fdff2f
--- /dev/null
+++ b/test/cases/dsl-stats/mode/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/mode/various/experr b/test/cases/dsl-stats/mode/various/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/mode/various/expout b/test/cases/dsl-stats/mode/various/expout
new file mode 100644
index 0000000000..3b792ea2ce
--- /dev/null
+++ b/test/cases/dsl-stats/mode/various/expout
@@ -0,0 +1,24 @@
+mode_0                 (error)
+mode_0_type            error
+mode_null              (error)
+mode_null_type         error
+mode_empty_array       
+mode_empty_array_type  empty
+mode_array_1           7
+mode_array_1_type      int
+mode_array_3a          7
+mode_array_3a_type     int
+mode_array_3b          7
+mode_array_3b_type     int
+mode_array_nested      9
+mode_array_nested_type int
+mode_empty_map         
+mode_empty_map_type    empty
+mode_map_1             7
+mode_map_1_type        int
+mode_map_3a            7
+mode_map_3a_type       int
+mode_map_3b            7
+mode_map_3b_type       int
+mode_map_nested        9
+mode_map_nested_type   int
diff --git a/test/cases/dsl-stats/mode/various/mlr b/test/cases/dsl-stats/mode/various/mlr
new file mode 100644
index 0000000000..d59e8b0705
--- /dev/null
+++ b/test/cases/dsl-stats/mode/various/mlr
@@ -0,0 +1,28 @@
+end {
+    outputs = {};
+
+    outputs["mode_0"] = mode(0);
+    outputs["mode_null"] = mode(null);
+    outputs["mode_nonesuch"] = mode(nonesuch);
+
+    outputs["mode_empty_array"] = mode([]);
+    outputs["mode_array_1"] = mode([7]);
+    outputs["mode_array_3a"] = mode([7,8,9]);
+    outputs["mode_array_3b"] = mode([7,8,7]);
+    outputs["mode_array_nested"] = mode([7,[8,8,8,8,8,8],9,9,9]);
+
+    outputs["mode_empty_map"] = mode({});
+    outputs["mode_map_1"] = mode({ "a" : 7} );
+    outputs["mode_map_3a"] = mode({ "a" : 7, "b" : 8, "c" : 9 } );
+    outputs["mode_map_3b"] = mode({ "a" : 7, "b" : 8, "c" : 7 } );
+    outputs["mode_map_nested"] = mode({ "a" : 7, "b" : [8,8,8,8,8,8], "c" : 9, "d": 9, "e": 9 });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/moments/numeric-000/cmd b/test/cases/dsl-stats/moments/numeric-000/cmd
new file mode 100644
index 0000000000..7ebdd60bc0
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-000/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 0 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-000/experr b/test/cases/dsl-stats/moments/numeric-000/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/moments/numeric-000/expout b/test/cases/dsl-stats/moments/numeric-000/expout
new file mode 100644
index 0000000000..7a8c5d98f8
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-000/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 0,
+  "a_sum": 0,
+  "a_sum2": 0,
+  "a_sum3": 0,
+  "a_sum4": 0,
+  "a_mean": "",
+  "a_var": "",
+  "a_stddev": "",
+  "a_meaneb": "",
+  "a_skewness": "",
+  "a_kurtosis": "",
+  "m_count": 0,
+  "m_sum": 0,
+  "m_sum2": 0,
+  "m_sum3": 0,
+  "m_sum4": 0,
+  "m_mean": "",
+  "m_var": "",
+  "m_stddev": "",
+  "m_meaneb": "",
+  "m_skewness": "",
+  "m_kurtosis": ""
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-001/cmd b/test/cases/dsl-stats/moments/numeric-001/cmd
new file mode 100644
index 0000000000..fe2e61aa7c
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-001/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 1 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-001/experr b/test/cases/dsl-stats/moments/numeric-001/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/moments/numeric-001/expout b/test/cases/dsl-stats/moments/numeric-001/expout
new file mode 100644
index 0000000000..d278c2a6d1
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-001/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 1,
+  "a_sum": 1,
+  "a_sum2": 1,
+  "a_sum3": 1,
+  "a_sum4": 1,
+  "a_mean": 1,
+  "a_var": "",
+  "a_stddev": "",
+  "a_meaneb": "",
+  "a_skewness": "",
+  "a_kurtosis": "",
+  "m_count": 1,
+  "m_sum": 1,
+  "m_sum2": 1,
+  "m_sum3": 1,
+  "m_sum4": 1,
+  "m_mean": 1,
+  "m_var": "",
+  "m_stddev": "",
+  "m_meaneb": "",
+  "m_skewness": "",
+  "m_kurtosis": ""
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-002/cmd b/test/cases/dsl-stats/moments/numeric-002/cmd
new file mode 100644
index 0000000000..2d383e83c4
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-002/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 2 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-002/experr b/test/cases/dsl-stats/moments/numeric-002/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/moments/numeric-002/expout b/test/cases/dsl-stats/moments/numeric-002/expout
new file mode 100644
index 0000000000..7b268c3e58
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-002/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 2,
+  "a_sum": 3,
+  "a_sum2": 5,
+  "a_sum3": 9,
+  "a_sum4": 17,
+  "a_mean": 1.500000,
+  "a_var": 0.500000,
+  "a_stddev": 0.707107,
+  "a_meaneb": 0.500000,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -2.000000,
+  "m_count": 2,
+  "m_sum": 3,
+  "m_sum2": 5,
+  "m_sum3": 9,
+  "m_sum4": 17,
+  "m_mean": 1.500000,
+  "m_var": 0.500000,
+  "m_stddev": 0.707107,
+  "m_meaneb": 0.500000,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -2.000000
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-003/cmd b/test/cases/dsl-stats/moments/numeric-003/cmd
new file mode 100644
index 0000000000..fe70bddae4
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-003/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 3 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-003/experr b/test/cases/dsl-stats/moments/numeric-003/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/moments/numeric-003/expout b/test/cases/dsl-stats/moments/numeric-003/expout
new file mode 100644
index 0000000000..a7b80ccf0c
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-003/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 3,
+  "a_sum": 6,
+  "a_sum2": 14,
+  "a_sum3": 36,
+  "a_sum4": 98,
+  "a_mean": 2,
+  "a_var": 1.000000,
+  "a_stddev": 1.000000,
+  "a_meaneb": 0.577350,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -1.500000,
+  "m_count": 3,
+  "m_sum": 6,
+  "m_sum2": 14,
+  "m_sum3": 36,
+  "m_sum4": 98,
+  "m_mean": 2,
+  "m_var": 1.000000,
+  "m_stddev": 1.000000,
+  "m_meaneb": 0.577350,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -1.500000
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-004/cmd b/test/cases/dsl-stats/moments/numeric-004/cmd
new file mode 100644
index 0000000000..9f91c06f9f
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-004/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 4 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-004/experr b/test/cases/dsl-stats/moments/numeric-004/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/moments/numeric-004/expout b/test/cases/dsl-stats/moments/numeric-004/expout
new file mode 100644
index 0000000000..344a8a12ee
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-004/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 4,
+  "a_sum": 10,
+  "a_sum2": 30,
+  "a_sum3": 100,
+  "a_sum4": 354,
+  "a_mean": 2.500000,
+  "a_var": 1.666667,
+  "a_stddev": 1.290994,
+  "a_meaneb": 0.645497,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -1.360000,
+  "m_count": 4,
+  "m_sum": 10,
+  "m_sum2": 30,
+  "m_sum3": 100,
+  "m_sum4": 354,
+  "m_mean": 2.500000,
+  "m_var": 1.666667,
+  "m_stddev": 1.290994,
+  "m_meaneb": 0.645497,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -1.360000
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-all/cmd b/test/cases/dsl-stats/moments/numeric-all/cmd
new file mode 100644
index 0000000000..de6266f306
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-all/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-all/experr b/test/cases/dsl-stats/moments/numeric-all/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/moments/numeric-all/expout b/test/cases/dsl-stats/moments/numeric-all/expout
new file mode 100644
index 0000000000..9e62f653a6
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-all/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 10,
+  "a_sum": 55,
+  "a_sum2": 385,
+  "a_sum3": 3025,
+  "a_sum4": 25333,
+  "a_mean": 5.500000,
+  "a_var": 9.166667,
+  "a_stddev": 3.027650,
+  "a_meaneb": 0.957427,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -1.224242,
+  "m_count": 10,
+  "m_sum": 55,
+  "m_sum2": 385,
+  "m_sum3": 3025,
+  "m_sum4": 25333,
+  "m_mean": 5.500000,
+  "m_var": 9.166667,
+  "m_stddev": 3.027650,
+  "m_meaneb": 0.957427,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -1.224242
+}
+]
diff --git a/test/cases/dsl-stats/null_count/various/cmd b/test/cases/dsl-stats/null_count/various/cmd
new file mode 100644
index 0000000000..8e64fdff2f
--- /dev/null
+++ b/test/cases/dsl-stats/null_count/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/null_count/various/experr b/test/cases/dsl-stats/null_count/various/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/null_count/various/expout b/test/cases/dsl-stats/null_count/various/expout
new file mode 100644
index 0000000000..1bf369f1fd
--- /dev/null
+++ b/test/cases/dsl-stats/null_count/various/expout
@@ -0,0 +1,20 @@
+null_count_0                (error)
+null_count_0_type           error
+null_count_null             (error)
+null_count_null_type        error
+null_count_empty_array      0
+null_count_empty_array_type int
+null_count_array_1          0
+null_count_array_1_type     int
+null_count_array_2          0
+null_count_array_2_type     int
+null_count_array_3          2
+null_count_array_3_type     int
+null_count_empty_map        0
+null_count_empty_map_type   int
+null_count_map_1            0
+null_count_map_1_type       int
+null_count_map_2            0
+null_count_map_2_type       int
+null_count_map_3            2
+null_count_map_3_type       int
diff --git a/test/cases/dsl-stats/null_count/various/mlr b/test/cases/dsl-stats/null_count/various/mlr
new file mode 100644
index 0000000000..0882777116
--- /dev/null
+++ b/test/cases/dsl-stats/null_count/various/mlr
@@ -0,0 +1,28 @@
+end {
+    outputs = {};
+
+    # Only empty string and JSON-null count as nulls
+
+    outputs["null_count_0"] = null_count(0);
+    outputs["null_count_null"] = null_count(null);
+    outputs["null_count_nonesuch"] = null_count(nonesuch);
+
+    outputs["null_count_empty_array"] = null_count([]);
+    outputs["null_count_array_1"] = null_count([7]);
+    outputs["null_count_array_2"] = null_count([7,8]);
+    outputs["null_count_array_3"] = null_count(["",null,nonesuch]);
+
+    outputs["null_count_empty_map"] = null_count({});
+    outputs["null_count_map_1"] = null_count({ "a" : 7});
+    outputs["null_count_map_2"] = null_count({ "a" : 7, "b" : 8 });
+    outputs["null_count_map_3"] = null_count({ "a" : "", "b" : null, "c" : nonesuch });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-000/cmd b/test/cases/dsl-stats/percentiles/non-numeric-000/cmd
new file mode 100644
index 0000000000..a862c1303d
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-000/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 0 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-000/experr b/test/cases/dsl-stats/percentiles/non-numeric-000/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-000/expout b/test/cases/dsl-stats/percentiles/non-numeric-000/expout
new file mode 100644
index 0000000000..2e711ec221
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-000/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "",
+  "a_max": "",
+  "a_minlen": "",
+  "a_maxlen": "",
+  "a_median": "",
+  "a_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psa": ["", "", "", "", "", "", "", "", ""],
+  "a_psia": ["", "", "", "", "", "", "", "", ""],
+  "m_min": "",
+  "m_max": "",
+  "m_minlen": "",
+  "m_maxlen": "",
+  "m_median": "",
+  "m_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psa": ["", "", "", "", "", "", "", "", ""],
+  "m_psia": ["", "", "", "", "", "", "", "", ""]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-001/cmd b/test/cases/dsl-stats/percentiles/non-numeric-001/cmd
new file mode 100644
index 0000000000..291777b392
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-001/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 1 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-001/experr b/test/cases/dsl-stats/percentiles/non-numeric-001/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-001/expout b/test/cases/dsl-stats/percentiles/non-numeric-001/expout
new file mode 100644
index 0000000000..a4c419c7f4
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-001/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "pan",
+  "a_max": "pan",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "a_psi": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "a_psa": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
+  "a_psia": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
+  "m_min": "pan",
+  "m_max": "pan",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "m_psi": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "m_psa": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
+  "m_psia": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-002/cmd b/test/cases/dsl-stats/percentiles/non-numeric-002/cmd
new file mode 100644
index 0000000000..71815b4571
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-002/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 2 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-002/experr b/test/cases/dsl-stats/percentiles/non-numeric-002/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-002/expout b/test/cases/dsl-stats/percentiles/non-numeric-002/expout
new file mode 100644
index 0000000000..c814d0c5ac
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-002/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "pan",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "pan"
+  },
+  "a_psa": ["eks", "eks", "eks", "eks", "pan", "pan", "pan", "pan", "pan"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "pan"],
+  "m_min": "eks",
+  "m_max": "pan",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "pan"
+  },
+  "m_psa": ["eks", "eks", "eks", "eks", "pan", "pan", "pan", "pan", "pan"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "pan"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-003/cmd b/test/cases/dsl-stats/percentiles/non-numeric-003/cmd
new file mode 100644
index 0000000000..8e32f39f31
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-003/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 3 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-003/experr b/test/cases/dsl-stats/percentiles/non-numeric-003/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-003/expout b/test/cases/dsl-stats/percentiles/non-numeric-003/expout
new file mode 100644
index 0000000000..995605fd0c
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-003/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "wye",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "a_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"],
+  "m_min": "eks",
+  "m_max": "wye",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "m_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-004/cmd b/test/cases/dsl-stats/percentiles/non-numeric-004/cmd
new file mode 100644
index 0000000000..5703b12309
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-004/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 4 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-004/experr b/test/cases/dsl-stats/percentiles/non-numeric-004/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-004/expout b/test/cases/dsl-stats/percentiles/non-numeric-004/expout
new file mode 100644
index 0000000000..995605fd0c
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-004/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "wye",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "a_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"],
+  "m_min": "eks",
+  "m_max": "wye",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "m_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-all/cmd b/test/cases/dsl-stats/percentiles/non-numeric-all/cmd
new file mode 100644
index 0000000000..b20e151b45
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-all/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-all/experr b/test/cases/dsl-stats/percentiles/non-numeric-all/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-all/expout b/test/cases/dsl-stats/percentiles/non-numeric-all/expout
new file mode 100644
index 0000000000..326ec1168b
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-all/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "zee",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "hat",
+    "50": "pan",
+    "75": "wye",
+    "90": "zee",
+    "99": "zee",
+    "100": "zee"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "zee"
+  },
+  "a_psa": ["eks", "eks", "eks", "hat", "pan", "wye", "zee", "zee", "zee"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "zee"],
+  "m_min": "eks",
+  "m_max": "zee",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "hat",
+    "50": "pan",
+    "75": "wye",
+    "90": "zee",
+    "99": "zee",
+    "100": "zee"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "zee"
+  },
+  "m_psa": ["eks", "eks", "eks", "hat", "pan", "wye", "zee", "zee", "zee"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "zee"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-000/cmd b/test/cases/dsl-stats/percentiles/numeric-000/cmd
new file mode 100644
index 0000000000..432afc1904
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-000/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 0 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-000/experr b/test/cases/dsl-stats/percentiles/numeric-000/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/numeric-000/expout b/test/cases/dsl-stats/percentiles/numeric-000/expout
new file mode 100644
index 0000000000..2e711ec221
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-000/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "",
+  "a_max": "",
+  "a_minlen": "",
+  "a_maxlen": "",
+  "a_median": "",
+  "a_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psa": ["", "", "", "", "", "", "", "", ""],
+  "a_psia": ["", "", "", "", "", "", "", "", ""],
+  "m_min": "",
+  "m_max": "",
+  "m_minlen": "",
+  "m_maxlen": "",
+  "m_median": "",
+  "m_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psa": ["", "", "", "", "", "", "", "", ""],
+  "m_psia": ["", "", "", "", "", "", "", "", ""]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-001/cmd b/test/cases/dsl-stats/percentiles/numeric-001/cmd
new file mode 100644
index 0000000000..c9408b30e8
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-001/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 1 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-001/experr b/test/cases/dsl-stats/percentiles/numeric-001/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/numeric-001/expout b/test/cases/dsl-stats/percentiles/numeric-001/expout
new file mode 100644
index 0000000000..01539222ee
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-001/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 1,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 1,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "a_psi": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "a_psa": [1, 1, 1, 1, 1, 1, 1, 1, 1],
+  "a_psia": [1, 1, 1, 1, 1, 1, 1, 1, 1],
+  "m_min": 1,
+  "m_max": 1,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 1,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "m_psi": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "m_psa": [1, 1, 1, 1, 1, 1, 1, 1, 1],
+  "m_psia": [1, 1, 1, 1, 1, 1, 1, 1, 1]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-002/cmd b/test/cases/dsl-stats/percentiles/numeric-002/cmd
new file mode 100644
index 0000000000..c749a00ff5
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-002/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 2 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-002/experr b/test/cases/dsl-stats/percentiles/numeric-002/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/numeric-002/expout b/test/cases/dsl-stats/percentiles/numeric-002/expout
new file mode 100644
index 0000000000..fde0fe23b3
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-002/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 2,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 2,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 2,
+    "90": 2,
+    "99": 2,
+    "100": 2
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 1.010000,
+    "10": 1.100000,
+    "25": 1.250000,
+    "50": 1.500000,
+    "75": 1.750000,
+    "90": 1.900000,
+    "99": 1.990000,
+    "100": 2
+  },
+  "a_psa": [1, 1, 1, 1, 2, 2, 2, 2, 2],
+  "a_psia": [1.000000, 1.010000, 1.100000, 1.250000, 1.500000, 1.750000, 1.900000, 1.990000, 2],
+  "m_min": 1,
+  "m_max": 2,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 2,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 2,
+    "90": 2,
+    "99": 2,
+    "100": 2
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 1.010000,
+    "10": 1.100000,
+    "25": 1.250000,
+    "50": 1.500000,
+    "75": 1.750000,
+    "90": 1.900000,
+    "99": 1.990000,
+    "100": 2
+  },
+  "m_psa": [1, 1, 1, 1, 2, 2, 2, 2, 2],
+  "m_psia": [1.000000, 1.010000, 1.100000, 1.250000, 1.500000, 1.750000, 1.900000, 1.990000, 2]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-003/cmd b/test/cases/dsl-stats/percentiles/numeric-003/cmd
new file mode 100644
index 0000000000..8198811391
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-003/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 3 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-003/experr b/test/cases/dsl-stats/percentiles/numeric-003/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/numeric-003/expout b/test/cases/dsl-stats/percentiles/numeric-003/expout
new file mode 100644
index 0000000000..e1fdea0d7e
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-003/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 3,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 2,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 3,
+    "90": 3,
+    "99": 3,
+    "100": 3
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 1.020000,
+    "10": 1.200000,
+    "25": 1.500000,
+    "50": 2.000000,
+    "75": 2.500000,
+    "90": 2.800000,
+    "99": 2.980000,
+    "100": 3
+  },
+  "a_psa": [1, 1, 1, 1, 2, 3, 3, 3, 3],
+  "a_psia": [1.000000, 1.020000, 1.200000, 1.500000, 2.000000, 2.500000, 2.800000, 2.980000, 3],
+  "m_min": 1,
+  "m_max": 3,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 2,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 3,
+    "90": 3,
+    "99": 3,
+    "100": 3
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 1.020000,
+    "10": 1.200000,
+    "25": 1.500000,
+    "50": 2.000000,
+    "75": 2.500000,
+    "90": 2.800000,
+    "99": 2.980000,
+    "100": 3
+  },
+  "m_psa": [1, 1, 1, 1, 2, 3, 3, 3, 3],
+  "m_psia": [1.000000, 1.020000, 1.200000, 1.500000, 2.000000, 2.500000, 2.800000, 2.980000, 3]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-004/cmd b/test/cases/dsl-stats/percentiles/numeric-004/cmd
new file mode 100644
index 0000000000..5191312322
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-004/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 4 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-004/experr b/test/cases/dsl-stats/percentiles/numeric-004/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/numeric-004/expout b/test/cases/dsl-stats/percentiles/numeric-004/expout
new file mode 100644
index 0000000000..677a6f591f
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-004/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 4,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 3,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 2,
+    "50": 3,
+    "75": 4,
+    "90": 4,
+    "99": 4,
+    "100": 4
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 1.030000,
+    "10": 1.300000,
+    "25": 1.750000,
+    "50": 2.500000,
+    "75": 3.250000,
+    "90": 3.700000,
+    "99": 3.970000,
+    "100": 4
+  },
+  "a_psa": [1, 1, 1, 2, 3, 4, 4, 4, 4],
+  "a_psia": [1.000000, 1.030000, 1.300000, 1.750000, 2.500000, 3.250000, 3.700000, 3.970000, 4],
+  "m_min": 1,
+  "m_max": 4,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 3,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 2,
+    "50": 3,
+    "75": 4,
+    "90": 4,
+    "99": 4,
+    "100": 4
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 1.030000,
+    "10": 1.300000,
+    "25": 1.750000,
+    "50": 2.500000,
+    "75": 3.250000,
+    "90": 3.700000,
+    "99": 3.970000,
+    "100": 4
+  },
+  "m_psa": [1, 1, 1, 2, 3, 4, 4, 4, 4],
+  "m_psia": [1.000000, 1.030000, 1.300000, 1.750000, 2.500000, 3.250000, 3.700000, 3.970000, 4]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-all/cmd b/test/cases/dsl-stats/percentiles/numeric-all/cmd
new file mode 100644
index 0000000000..2f7f93eb17
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-all/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-all/experr b/test/cases/dsl-stats/percentiles/numeric-all/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-stats/percentiles/numeric-all/expout b/test/cases/dsl-stats/percentiles/numeric-all/expout
new file mode 100644
index 0000000000..7032005180
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-all/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 10000,
+  "a_minlen": 1,
+  "a_maxlen": 5,
+  "a_median": 5001,
+  "a_ps": {
+    "0": 1,
+    "1": 101,
+    "10": 1001,
+    "25": 2501,
+    "50": 5001,
+    "75": 7501,
+    "90": 9001,
+    "99": 9901,
+    "100": 10000
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 100.990000,
+    "10": 1000.900000,
+    "25": 2500.750000,
+    "50": 5000.500000,
+    "75": 7500.250000,
+    "90": 9000.100000,
+    "99": 9900.010000,
+    "100": 10000
+  },
+  "a_psa": [1, 101, 1001, 2501, 5001, 7501, 9001, 9901, 10000],
+  "a_psia": [1.000000, 100.990000, 1000.900000, 2500.750000, 5000.500000, 7500.250000, 9000.100000, 9900.010000, 10000],
+  "m_min": 1,
+  "m_max": 10000,
+  "m_minlen": 1,
+  "m_maxlen": 5,
+  "m_median": 5001,
+  "m_ps": {
+    "0": 1,
+    "1": 101,
+    "10": 1001,
+    "25": 2501,
+    "50": 5001,
+    "75": 7501,
+    "90": 9001,
+    "99": 9901,
+    "100": 10000
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 100.990000,
+    "10": 1000.900000,
+    "25": 2500.750000,
+    "50": 5000.500000,
+    "75": 7500.250000,
+    "90": 9000.100000,
+    "99": 9900.010000,
+    "100": 10000
+  },
+  "m_psa": [1, 101, 1001, 2501, 5001, 7501, 9001, 9901, 10000],
+  "m_psia": [1.000000, 100.990000, 1000.900000, 2500.750000, 5000.500000, 7500.250000, 9000.100000, 9900.010000, 10000]
+}
+]
diff --git a/test/cases/dsl-stats/sums/README.txt b/test/cases/dsl-stats/sums/README.txt
new file mode 100644
index 0000000000..c257842b1a
--- /dev/null
+++ b/test/cases/dsl-stats/sums/README.txt
@@ -0,0 +1 @@
+Coverage via unit-test framework, not regression-test framework
diff --git a/test/input/test-moments.mlr b/test/input/test-moments.mlr
new file mode 100644
index 0000000000..0f81bce0b7
--- /dev/null
+++ b/test/input/test-moments.mlr
@@ -0,0 +1,39 @@
+begin {
+    @a = [];
+    @m = {};
+    @field = "i";
+}
+
+@a[NR] = $[@field];
+@m[NR] = $[@field];
+
+end {
+    outputs = {
+
+        "a_count": count(@a),
+        "a_sum": sum(@a),
+        "a_sum2": sum2(@a),
+        "a_sum3": sum3(@a),
+        "a_sum4": sum4(@a),
+        "a_mean": mean(@a),
+        "a_var": variance(@a),
+        "a_stddev": stddev(@a),
+        "a_meaneb": meaneb(@a),
+        "a_skewness": skewness(@a),
+        "a_kurtosis": kurtosis(@a),
+
+        "m_count": count(@m),
+        "m_sum": sum(@m),
+        "m_sum2": sum2(@m),
+        "m_sum3": sum3(@m),
+        "m_sum4": sum4(@m),
+        "m_mean": mean(@m),
+        "m_var": variance(@m),
+        "m_stddev": stddev(@m),
+        "m_meaneb": meaneb(@m),
+        "m_skewness": skewness(@m),
+        "m_kurtosis": kurtosis(@m),
+
+    };
+    emit outputs;
+}
diff --git a/test/input/test-percentiles.mlr b/test/input/test-percentiles.mlr
new file mode 100644
index 0000000000..1c5d807fe6
--- /dev/null
+++ b/test/input/test-percentiles.mlr
@@ -0,0 +1,44 @@
+begin {
+    @a = [];
+    @m = {};
+    # @field must be given by put -s field=namegoeshere in the script invocation.
+    # This lets us test percentiles over various field names/types while re-using
+    # this same script.
+}
+
+@a[NR] = $[@field];
+@m[NR] = $[@field];
+
+end {
+    outputs = {
+
+        "a_min": min(@a),
+        "a_max": max(@a),
+        "a_minlen": minlen(@a),
+        "a_maxlen": maxlen(@a),
+        "a_median": median(@a),
+        "a_ps": percentiles(@a, [0,1,10,25,50,75,90,99,100]),
+        "a_psi": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
+        "a_psa": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
+        "a_psia": percentiles(@a, [0,1,10,25,50,75,90,99,100], {
+            "interpolate_linearly": true,
+            "output_array_not_map":true,
+        }),
+
+        "m_min": min(@m),
+        "m_max": max(@m),
+        "m_minlen": minlen(@m),
+        "m_maxlen": maxlen(@m),
+        "m_median": median(@m),
+        "m_ps": percentiles(@m, [0,1,10,25,50,75,90,99,100]),
+        "m_psi": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
+        "m_psa": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
+        "m_psia": percentiles(@m, [0,1,10,25,50,75,90,99,100], {
+            "interpolate_linearly": true,
+            "output_array_not_map":true,
+        }),
+
+    };
+    emit outputs;
+}
+