From a8701c895d18bcc5d48cffeb6244171e88b04b0e Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 14 Sep 2021 12:52:11 -0400 Subject: [PATCH 01/27] initial attempt --- src/DataFramesMeta.jl | 1 + src/parsing_astable.jl | 70 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 src/parsing_astable.jl diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index fec20a35..3730bf84 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -22,6 +22,7 @@ export @with, const DOLLAR = raw"$" include("parsing.jl") +include("parsing_astable.jl") include("macros.jl") include("linqmacro.jl") include("eachrow.jl") diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl new file mode 100644 index 00000000..e978688a --- /dev/null +++ b/src/parsing_astable.jl @@ -0,0 +1,70 @@ +function conditionally_add_symbols!(inputs_to_function, lhs_assignments, col) + # if it's already been assigned at top-level, + # don't add it to the inputs + if haskey(lhs_assignments, col) + return lhs_assignments[col] + else + return addkey!(inputs_to_function, col) + end +end + +replace_syms_astable!(inputs_to_function, lhs_assignments, x) = x +replace_syms_astable!(inputs_to_function, lhs_assignments, q::QuoteNode) = + conditionally_add_symbols!(inputs_to_function, lhs_assignments, q) + +function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) + if onearg(e, :^) + return e.args[2] + end + + col = get_column_expr(e) + if col !== nothing + return conditionally_add_symbols!(inputs_to_function, lhs_assignments, col) + elseif e.head == :. + return replace_dotted_astable!(inputs_to_function, lhs_assignments, e) + else + return mapexpr(x -> replace_syms_astable!(inputs_to_function, lhs_assignments, x), e) + end +end + +protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = e +protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = + replace_syms!(inputs_to_function, lhs_assignments, e) + +function replace_dotted_astable!(inputs_to_function, lhs_assignments, e) + x_new = replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[1]) + y_new = protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[2]) + Expr(:., x_new, y_new) +end + +is_column_assigment(ex) = false +function is_column_assigment(ex::Expr) + ex.head == :(=) && (get_column_expr(ex.args[1]) !== nothing) +end + +function collect_top_level_column_assignments(ex) + inputs_to_function = Dict{Any, Symbol}() + lhs_assignments = Dict{Any, Symbol}() + + ex = MacroTools.flatten(ex) + exprs = map(ex.args) do arg + @show arg + @show is_column_assigment(arg) + if is_column_assigment(arg) + lhs = arg.args[1] + rhs = arg.args[2] + new_ex = replace_syms_astable!(inputs_to_function, lhs_assignments, arg.args[2]) + if haskey(inputs_to_function, lhs) + new_lhs = inputs_to_function[lhs] + else + new_lhs = addkey!(lhs_assignments, lhs) + end + + Expr(:(=), new_lhs, new_ex) + else + replace_syms_astable!(inputs_to_function, lhs_assignments, arg) + end + end + cols_to_add = collect(keys(inputs_to_function)) + new_ex = Expr(:block, exprs...) +end \ No newline at end of file From 9b997a6b7a1a2a314a2978240b68a124abd3ace1 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 15 Sep 2021 06:13:27 -0400 Subject: [PATCH 02/27] finally working --- src/parsing.jl | 13 ++++++++++--- src/parsing_astable.jl | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/parsing.jl b/src/parsing.jl index 3a250138..c2a25252 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -91,7 +91,8 @@ is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(na const BYROW_SYM = Symbol("@byrow") const PASSMISSING_SYM = Symbol("@passmissing") -const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false)) +const ASTABLE_SYM = Symbol("@astable") +const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false), ASTABLE_SYM => Ref(false)) extract_macro_flags(ex, exprflags = deepcopy(DEFAULT_FLAGS)) = (ex, exprflags) function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS)) @@ -269,7 +270,13 @@ function fun_to_vec(ex::Expr; return ex_col end - if no_dest + if final_flags[ASTABLE_SYM][] + src, fun = get_source_fun_astable(ex; exprflags = final_flags) + + return :($src => $fun => AsTable) + end + + if no_dest # subet and with src, fun = get_source_fun(ex, exprflags = final_flags) return quote $src => $fun @@ -359,7 +366,7 @@ function create_args_vector(arg; wrap_byrow::Bool=false) outer_flags[BYROW_SYM][] = true end - if arg isa Expr && arg.head == :block + if arg isa Expr && arg.head == :block && !outer_flags[ASTABLE_SYM][] x = MacroTools.rmlines(arg).args else x = Any[arg] diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index e978688a..c6f8d890 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -28,7 +28,7 @@ function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) end protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = e -protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = +protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) = replace_syms!(inputs_to_function, lhs_assignments, e) function replace_dotted_astable!(inputs_to_function, lhs_assignments, e) @@ -42,20 +42,25 @@ function is_column_assigment(ex::Expr) ex.head == :(=) && (get_column_expr(ex.args[1]) !== nothing) end -function collect_top_level_column_assignments(ex) +# Taken from MacroTools.jl +# No docstring so assumed untable +block(ex) = isexpr(ex, :block) ? ex : :($ex;) + +function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) inputs_to_function = Dict{Any, Symbol}() lhs_assignments = Dict{Any, Symbol}() - ex = MacroTools.flatten(ex) + # Make sure all top-level assignments are + # in the args vector + ex = block(MacroTools.flatten(ex)) exprs = map(ex.args) do arg - @show arg - @show is_column_assigment(arg) if is_column_assigment(arg) - lhs = arg.args[1] + lhs = get_column_expr(arg.args[1]) rhs = arg.args[2] new_ex = replace_syms_astable!(inputs_to_function, lhs_assignments, arg.args[2]) if haskey(inputs_to_function, lhs) new_lhs = inputs_to_function[lhs] + lhs_assignments[lhs] = new_lhs else new_lhs = addkey!(lhs_assignments, lhs) end @@ -65,6 +70,26 @@ function collect_top_level_column_assignments(ex) replace_syms_astable!(inputs_to_function, lhs_assignments, arg) end end - cols_to_add = collect(keys(inputs_to_function)) - new_ex = Expr(:block, exprs...) + source = :(DataFramesMeta.make_source_concrete($(Expr(:vect, keys(inputs_to_function)...)))) + + inputargs = Expr(:tuple, values(inputs_to_function)...) + nt_iterator = (:(Symbol($k) => $v) for (k, v) in lhs_assignments) + nt_expr = Expr(:tuple, Expr(:parameters, nt_iterator...)) + body = Expr(:block, Expr(:block, exprs...), nt_expr) + + fun = quote + $inputargs -> begin + $body + end + end + + # TODO: Add passmissing support by + # checking if any input arguments missing, + # and if-so, making a named tuple with + # missing values + if exprflags[BYROW_SYM][] + fun = :(ByRow($fun)) + end + + return source, fun end \ No newline at end of file From d63956067a705dab4e1a5925ef51e0e2d6177b7a Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 15 Sep 2021 06:16:25 -0400 Subject: [PATCH 03/27] start adding tests --- test/astable_flag.jl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 test/astable_flag.jl diff --git a/test/astable_flag.jl b/test/astable_flag.jl new file mode 100644 index 00000000..542bcfb5 --- /dev/null +++ b/test/astable_flag.jl @@ -0,0 +1,23 @@ +module TestAsTableFlag + +using Test +using DataFrames +using DataFramesMeta +using Statistics + +const ≅ = isequal + +@testset "@astable macro flag" begin + df = DataFrame(a = 1, b = 2) + + d = @rtransform df @astable begin + :x = 1 + y = 50 + :a = :x + y + end + + @test d == DataFrame(a = 51, b = 2, x = 1) +end + + +end # module \ No newline at end of file From b77e8ca9161ae308a3092e1ce24d03184b7b646c Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 16 Sep 2021 04:58:56 -0400 Subject: [PATCH 04/27] more tests --- test/astable_flag.jl | 111 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 5 deletions(-) diff --git a/test/astable_flag.jl b/test/astable_flag.jl index 542bcfb5..fcdc88b3 100644 --- a/test/astable_flag.jl +++ b/test/astable_flag.jl @@ -1,22 +1,123 @@ module TestAsTableFlag using Test -using DataFrames using DataFramesMeta using Statistics const ≅ = isequal -@testset "@astable macro flag" begin +@testset "@astable with just assignments" begin df = DataFrame(a = 1, b = 2) d = @rtransform df @astable begin :x = 1 - y = 50 - :a = :x + y + + nothing + end + + @test d == DataFrame(a = 1, b = 2, x = 1) + + d = @rselect df @astable begin + :x = 1 + y = 100 + nothing + end + + @test d == DataFrame(x = 1) + + d = @transform df @astable begin + :x = [5] + y = 100 + nothing + end + + @test d == DataFrame(a = 1, b = 2, x = 5) + + d = @select df @astable begin + :x = [5] + y = 100 + nothing + end + + @test d == DataFrame(x = 5) +end + +@testset "@astable with just assignments, mutating" begin + # After finalizing above testset +end + +@testset "@astable with strings" begin + df = DataFrame(a = 1, b = 2) + + x_str = "x" + d = @rtransform df @astable begin + $x_str = 1 + y = 100 + nothing + end + + @test d == DataFrame(a = 1, b = 2, x = 1) + + d = @rselect df @astable begin + $x_str = 1 + y = 100 + nothing + end + + @test d == DataFrame(x = 1) + + d = @transform df @astable begin + $x_str = [5] + y = 100 + nothing + end + + @test d == DataFrame(a = 1, b = 2, x = 5) + + d = @select df @astable begin + $x_str = [5] + y = 100 + nothing + end + + @test d == DataFrame(x = 5) +end + +@testset "Re-using variables" begin + df = DataFrame(a = 1, b = 2) + + d = @rtransform df @astable begin + :x = 1 + y = 5 + :z = :x + y + end + + @test d == DataFrame(a = 1, b = 2, x = 1, z = 6) + + d = @rselect df @astable begin + :x = 1 + y = 5 + :z = :x + y + end + + @test d == DataFrame(x = 1, z = 6) + + x_str = "x" + d = @rtransform df @astable begin + $x_str = 1 + y = 5 + :z = $x_str + y + end + + @test d == DataFrame(a = 1, b = 2, x = 1, z = 6) + + d = @rselect df @astable begin + $x_str = 1 + y = 5 + :z = $x_str + y end - @test d == DataFrame(a = 51, b = 2, x = 1) + @test d == DataFrame(x = 1, z = 6) end From 3cdf0d5b4f1ddc79ba6cb992c2ba567b468fe617 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 16 Sep 2021 05:08:59 -0400 Subject: [PATCH 05/27] more tests --- Project.toml | 3 ++- src/DataFramesMeta.jl | 2 ++ src/parsing_astable.jl | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index fbb357d4..fbebc46e 100644 --- a/Project.toml +++ b/Project.toml @@ -6,14 +6,15 @@ version = "0.9.1" Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" [compat] +Chain = "0.4" DataFrames = "1" MacroTools = "0.5" Reexport = "0.2, 1" julia = "1" -Chain = "0.4" [extras] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 3730bf84..1c684842 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -4,6 +4,8 @@ using Reexport using MacroTools +using OrderedCollections: OrderedCollections + @reexport using DataFrames @reexport using Chain diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index c6f8d890..becbac3c 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -48,7 +48,7 @@ block(ex) = isexpr(ex, :block) ? ex : :($ex;) function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) inputs_to_function = Dict{Any, Symbol}() - lhs_assignments = Dict{Any, Symbol}() + lhs_assignments = OrderedCollections.OrderedDict{Any, Symbol}() # Make sure all top-level assignments are # in the args vector From b878fbb68365045b3408d4a108d45cd8d2eee3f5 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 16 Sep 2021 05:33:08 -0400 Subject: [PATCH 06/27] add docstring --- src/DataFramesMeta.jl | 2 +- src/macros.jl | 114 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 94 insertions(+), 22 deletions(-) diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 1c684842..56914e42 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -18,7 +18,7 @@ export @with, @transform, @select, @transform!, @select!, @rtransform, @rselect, @rtransform!, @rselect!, @eachrow, @eachrow!, - @byrow, @passmissing, + @byrow, @passmissing, @astable, @based_on, @where # deprecated const DOLLAR = raw"$" diff --git a/src/macros.jl b/src/macros.jl index e954a371..21dfab1f 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -350,6 +350,99 @@ macro passmissing(args...) throw(ArgumentError("@passmissing only works inside DataFramesMeta macros.")) end +""" + astable(args...) + +Return a `NamedTuple` from a transformation inside DataFramesMeta.jl macros. + +`@astable` acts on a single block. It works through all top-level expressions +and collects all such expressions of the form `:y = x`, i.e. assignments to a +`Symbol`, which is a syntax error outside of the macro. At the end of the +expression, all assignments are collected into a `NamedTuple` to be used +with the `AsTable` destination in the DataFrames.jl transformation +mini-language. + +Concretely, the expressions + +``` +df = DataFrame(a = 1) + +@rtransform df @astable begin + :x = 1 + y = 50 + :z = :x + y + :a +end +``` + +becomes the pair + +``` +function f(a) + x_t = 1 + y = 50 + z_t = x_t + y + a + + (; x = x_t, z = z_t) +end + +transform(df, [:a] => f => AsTable) +``` + +`@astable` is useful when performing intermediate calculations +yet store their results in new columns. For example, the following fails. + +``` +@rtransform df begin + :new_col_1 = :x + :y + :new_col_2 = :new_col_1 + :z +end +``` + +This because DataFrames.jl does not guarantee sequential evaluation of +transformations. `@astable` solves this problem + +@rtransform df @astable begin + :new_col_1 = :x + :y + :new_col_2 = :new_col_1 + :z +end + +### Examples + +``` +julia> df = DataFrame(a = [1, 2, 3], b = [4, 5, 6]); + +julia> d = @rtransform df @astable begin + :x = 1 + y = 5 + :z = :x + y + end +3×4 DataFrame + Row │ a b x z + │ Int64 Int64 Int64 Int64 +─────┼──────────────────────────── + 1 │ 1 4 1 6 + 2 │ 2 5 1 6 + 3 │ 3 6 1 6 + +julia> df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 70, 80]); + +julia> @by df :a @astable begin + $(DOLLAR)"Mean of b" = mean(:b) + $(DOLLAR)"Standard deviation of b" = std(:b) + end +2×3 DataFrame + Row │ a Mean of b Standard deviation of b + │ Int64 Float64 Float64 +─────┼─────────────────────────────────────────── + 1 │ 1 5.5 0.707107 + 2 │ 2 75.0 7.07107 +``` + +""" +macro astable(args...) + throw(ArgumentError("@astable only works inside DataFramesMeta macros.")) +end + ############################################################################## ## ## @with @@ -1546,17 +1639,6 @@ function combine_helper(x, args...; deprecation_warning = false) exprs, outer_flags = create_args_vector(args...) - fe = first(exprs) - if length(exprs) == 1 && - get_column_expr(fe) === nothing && - !(fe.head == :(=) || fe.head == :kw) - - @warn "Returning a Table object from @by and @combine now requires `$(DOLLAR)AsTable` on the LHS." - - lhs = Expr(:$, :AsTable) - exprs = ((:($lhs = $fe)),) - end - t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote @@ -1666,16 +1748,6 @@ end function by_helper(x, what, args...) # Only allow one argument when returning a Table object exprs, outer_flags = create_args_vector(args...) - fe = first(exprs) - if length(exprs) == 1 && - get_column_expr(fe) === nothing && - !(fe.head == :(=) || fe.head == :kw) - - @warn "Returning a Table object from @by and @combine now requires `\$AsTable` on the LHS." - - lhs = Expr(:$, :AsTable) - exprs = ((:($lhs = $fe)),) - end t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) From 2344a2e459d379d2994a8e905982a3eaa165068e Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 16 Sep 2021 05:53:42 -0400 Subject: [PATCH 07/27] tests pass --- test/deprecated.jl | 3 --- test/function_compilation.jl | 4 ++-- test/grouping.jl | 6 ------ test/runtests.jl | 1 + 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/test/deprecated.jl b/test/deprecated.jl index b76c8cbc..126ec441 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -42,7 +42,6 @@ const ≅ = isequal @test @based_on(gd, n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody] @test @based_on(gd, body = :i).body == df.i @test @based_on(gd, transform = :i).transform == df.i - @test @based_on(gd, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4] @test @based_on(gd, n = mean(cols(iq))).n == [2.0, 4.5] @test @based_on(gd, n = mean(cols(iq)) + mean(cols(gq))).n == [3.0, 6.5] @@ -51,7 +50,6 @@ const ≅ = isequal @test @based_on(gd, n = first(Symbol.(cols(yq), ^(:body)))).n == [:vbody, :ybody] @test @based_on(gd, body = cols(iq)).body == df.i @test @based_on(gd, transform = cols(iq)).transform == df.i - @test @based_on(gd, (n1 = [first(cols(iq))], n2 = [first(cols(yq))])).n1 == [1, 4] @test @based_on(gd, n = mean(cols(ir))).n == [2.0, 4.5] @test @based_on(gd, n = mean(cols(ir)) + mean(cols(gr))).n == [3.0, 6.5] @@ -60,7 +58,6 @@ const ≅ = isequal @test @based_on(gd, n = first(Symbol.(cols(yr), ^(:body)))).n == [:vbody, :ybody] @test @based_on(gd, body = cols(ir)).body == df.i @test @based_on(gd, transform = cols(ir)).transform == df.i - @test @based_on(gd, (n1 = [first(cols(ir))], n2 = [first(cols(yr))])).n1 == [1, 4] @test @based_on(gd, n = mean(cols("i")) + 0 * first(cols(:g))).n == [2.0, 4.5] @test @based_on(gd, n = mean(cols(2)) + first(cols(1))).n == [3.0, 6.5] diff --git a/test/function_compilation.jl b/test/function_compilation.jl index 4c411f61..5921942d 100644 --- a/test/function_compilation.jl +++ b/test/function_compilation.jl @@ -154,9 +154,9 @@ using DataFramesMeta gd = groupby(df, :a) - @test @combine(gd, testnt(:b)) == DataFrame(a = [1], c = [2]) + @test @combine(gd, cols(AsTable) = testnt(:b)) == DataFrame(a = [1], c = [2]) - fasttime = @timed @combine(gd, testnt(:b)) + fasttime = @timed @combine(gd, cols(AsTable) = testnt(:b)) slowtime = @timed combine(gd, :b => (b -> testnt(b)) => AsTable) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") diff --git a/test/grouping.jl b/test/grouping.jl index a998c8a0..e4ea60b0 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -49,7 +49,6 @@ g = groupby(d, :x, sort=true) @test @combine(gd, :n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody] @test @combine(gd, :body = :i).body == df.i @test @combine(gd, :transform = :i).transform == df.i - @test @combine(gd, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4] @test @combine(gd, :n = mean($iq)).n == [2.0, 4.5] @test @combine(gd, :n = mean($iq) + mean($gq)).n == [3.0, 6.5] @@ -59,7 +58,6 @@ g = groupby(d, :x, sort=true) @test @combine(gd, $:n = mean($:i)).n == [2.0, 4.5] @test @combine(gd, :body = $iq).body == df.i @test @combine(gd, :transform = $iq).transform == df.i - @test @combine(gd, (n1 = [first($iq)], n2 = [first($yq)])).n1 == [1, 4] @test @combine(gd, :n = mean($ir)).n == [2.0, 4.5] @test @combine(gd, :n = mean($ir) + mean($gr)).n == [3.0, 6.5] @@ -68,7 +66,6 @@ g = groupby(d, :x, sort=true) @test @combine(gd, :n = first(Symbol.($yr, ^(:body)))).n == [:vbody, :ybody] @test @combine(gd, :body = $ir).body == df.i @test @combine(gd, :transform = $ir).transform == df.i - @test @combine(gd, (n1 = [first($ir)], n2 = [first($yr)])).n1 == [1, 4] @test @combine(gd, :n = mean($"i") + 0 * first($:g)).n == [2.0, 4.5] @test @combine(gd, :n = mean($2) + first($1)).n == [3.0, 6.5] @@ -192,7 +189,6 @@ end @test @by(df, :g, :n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody] @test @by(df, :g, :body = :i).body == df.i @test @by(df, :g, :transform = :i).transform == df.i - @test @by(df, :g, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4] @test @by(df, :g, :n = mean($iq)).n == [2.0, 4.5] @test @by(df, :g, :n = mean($iq) + mean($gq)).n == [3.0, 6.5] @@ -202,7 +198,6 @@ end @test @by(df, :g, $:n = mean($:i)).n == [2.0, 4.5] @test @by(df, :g, :body = $iq).body == df.i @test @by(df, :g, :transform = $iq).transform == df.i - @test @by(df, :g, (n1 = [first($iq)], n2 = [first($yq)])).n1 == [1, 4] @test @by(df, "g", :n = mean($ir)).n == [2.0, 4.5] @test @by(df, "g", :n = mean($ir) + mean($gr)).n == [3.0, 6.5] @@ -211,7 +206,6 @@ end @test @by(df, "g", :n = first(Symbol.($yr, ^(:body)))).n == [:vbody, :ybody] @test @by(df, "g", :body = $ir).body == df.i @test @by(df, "g", :transform = $ir).transform == df.i - @test @by(df, "g", (n1 = [first($ir)], n2 = [first($yr)])).n1 == [1, 4] @test @by(df, "g", :n = mean($"i") + 0 * first($:g)).n == [2.0, 4.5] @test @by(df, "g", :n = mean($2) + first($1)).n == [3.0, 6.5] diff --git a/test/runtests.jl b/test/runtests.jl index 3218556f..5ab6f363 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,6 +12,7 @@ my_tests = ["dataframes.jl", "deprecated.jl", "byrow.jl", "astable.jl", + "astable_flag.jl", "passmissing.jl"] println("Running tests:") From 6557def332c47069753fe84e2871ae3d558d571b Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 16 Sep 2021 06:19:07 -0400 Subject: [PATCH 08/27] add ByRow in docstring --- src/macros.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macros.jl b/src/macros.jl index 21dfab1f..d876cae2 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -385,7 +385,7 @@ function f(a) (; x = x_t, z = z_t) end -transform(df, [:a] => f => AsTable) +transform(df, [:a] => ByRow(f) => AsTable) ``` `@astable` is useful when performing intermediate calculations From 6002defe9c6d284f78155dd14506f93e1c136ce4 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 21 Sep 2021 05:33:32 -0400 Subject: [PATCH 09/27] add type annotation --- src/macros.jl | 15 ++++++++------- src/parsing_astable.jl | 23 +++++++++++++++-------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index d876cae2..074f8571 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -427,15 +427,16 @@ julia> d = @rtransform df @astable begin julia> df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 70, 80]); julia> @by df :a @astable begin - $(DOLLAR)"Mean of b" = mean(:b) - $(DOLLAR)"Standard deviation of b" = std(:b) + ex = extrema(:b) + :min_b = first(ex) + :max_b = last(ex) end 2×3 DataFrame - Row │ a Mean of b Standard deviation of b - │ Int64 Float64 Float64 -─────┼─────────────────────────────────────────── - 1 │ 1 5.5 0.707107 - 2 │ 2 75.0 7.07107 + Row │ a min_b max_b + │ Int64 Int64 Int64 +─────┼───────────────────── + 1 │ 1 5 6 + 2 │ 2 70 80 ``` """ diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index becbac3c..09d46995 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -1,4 +1,5 @@ -function conditionally_add_symbols!(inputs_to_function, lhs_assignments, col) +function conditionally_add_symbols!(inputs_to_function::AbstractDict, + lhs_assignments::OrderedCollections.OrderedDict, col) # if it's already been assigned at top-level, # don't add it to the inputs if haskey(lhs_assignments, col) @@ -8,11 +9,14 @@ function conditionally_add_symbols!(inputs_to_function, lhs_assignments, col) end end -replace_syms_astable!(inputs_to_function, lhs_assignments, x) = x -replace_syms_astable!(inputs_to_function, lhs_assignments, q::QuoteNode) = +replace_syms_astable!(inputs_to_function::AbstractDict, + lhs_assignments::OrderedCollections.OrderedDict, x) = x +replace_syms_astable!(inputs_to_function::AbstractDict, + lhs_assignments::OrderedCollections.OrderedDict, q::QuoteNode) = conditionally_add_symbols!(inputs_to_function, lhs_assignments, q) -function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) +function replace_syms_astable!(inputs_to_function::AbstractDict, + lhs_assignments::OrderedCollections.OrderedDict, e::Expr) if onearg(e, :^) return e.args[2] end @@ -27,11 +31,14 @@ function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) end end -protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = e -protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) = +protect_replace_syms_astable!(inputs_to_function::AbstractDict, + lhs_assignments::OrderedCollections.OrderedDict, e) = e +protect_replace_syms_astable!(inputs_to_function::AbstractDict, + lhs_assignments::OrderedCollections.OrderedDict, e::Expr) = replace_syms!(inputs_to_function, lhs_assignments, e) -function replace_dotted_astable!(inputs_to_function, lhs_assignments, e) +function replace_dotted_astable!(inputs_to_function::AbstractDict, + lhs_assignments::OrderedCollections.OrderedDict, e) x_new = replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[1]) y_new = protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[2]) Expr(:., x_new, y_new) @@ -43,7 +50,7 @@ function is_column_assigment(ex::Expr) end # Taken from MacroTools.jl -# No docstring so assumed untable +# No docstring so assumed unstable block(ex) = isexpr(ex, :block) ? ex : :($ex;) function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) From 0eca67d1ba2cfd703f101a3f198c64506d7a2ec0 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Tue, 21 Sep 2021 09:38:20 -0400 Subject: [PATCH 10/27] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/macros.jl | 4 ++-- src/parsing.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index d876cae2..3a1bc282 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -356,7 +356,7 @@ end Return a `NamedTuple` from a transformation inside DataFramesMeta.jl macros. `@astable` acts on a single block. It works through all top-level expressions -and collects all such expressions of the form `:y = x`, i.e. assignments to a +and collects all such expressions of the form `:y = ...`, i.e. assignments to a `Symbol`, which is a syntax error outside of the macro. At the end of the expression, all assignments are collected into a `NamedTuple` to be used with the `AsTable` destination in the DataFrames.jl transformation @@ -374,7 +374,7 @@ df = DataFrame(a = 1) end ``` -becomes the pair +become the pair ``` function f(a) diff --git a/src/parsing.jl b/src/parsing.jl index c2a25252..a4ced7fd 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -276,7 +276,7 @@ function fun_to_vec(ex::Expr; return :($src => $fun => AsTable) end - if no_dest # subet and with + if no_dest # subset and with src, fun = get_source_fun(ex, exprflags = final_flags) return quote $src => $fun From 08a1c4bcc9e9655c0cf18141acd4b6d29397834e Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 21 Sep 2021 05:42:54 -0400 Subject: [PATCH 11/27] better docs --- src/macros.jl | 22 +++++++++++++++++++--- src/parsing_astable.jl | 9 ++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 82f56c29..21d3fca8 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -353,7 +353,7 @@ end """ astable(args...) -Return a `NamedTuple` from a transformation inside DataFramesMeta.jl macros. +Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macros. `@astable` acts on a single block. It works through all top-level expressions and collects all such expressions of the form `:y = ...`, i.e. assignments to a @@ -388,8 +388,24 @@ end transform(df, [:a] => ByRow(f) => AsTable) ``` -`@astable` is useful when performing intermediate calculations -yet store their results in new columns. For example, the following fails. +`@astable` has two major advantages at the cost of increasing complexity. +First, `@astable` makes it easy to create multiple columns from a single +transformation, which share a scope. For example, `@astable` allows +for the following + +``` +@transform df @astable begin + m = mean(:x) + :x_demeaned = :x .- m + :x2_demeaned = :x2 .- m +end +``` + +The creation of `:x_demeaned` and `:x2_demeaned` both share the variable `m`, +which does not need to be calculated twice. + +Second, `@astable` is useful when performing intermediate calculations +and storing their results in new columns. For example, the following fails. ``` @rtransform df begin diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index 09d46995..6149dc9f 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -2,10 +2,8 @@ function conditionally_add_symbols!(inputs_to_function::AbstractDict, lhs_assignments::OrderedCollections.OrderedDict, col) # if it's already been assigned at top-level, # don't add it to the inputs - if haskey(lhs_assignments, col) - return lhs_assignments[col] - else - return addkey!(inputs_to_function, col) + return get!(lhs_assignments, col) do + gensym() end end @@ -69,7 +67,8 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) new_lhs = inputs_to_function[lhs] lhs_assignments[lhs] = new_lhs else - new_lhs = addkey!(lhs_assignments, lhs) + new_lhs = gensym() + lhs_assignments[lhs] = new_lhs end Expr(:(=), new_lhs, new_ex) From 581b2cfcf2762782de0797fd6674d8185b8b4d37 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 21 Sep 2021 05:43:42 -0400 Subject: [PATCH 12/27] more docs fixes --- src/macros.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macros.jl b/src/macros.jl index 21d3fca8..41f2bb46 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -357,7 +357,7 @@ Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macr `@astable` acts on a single block. It works through all top-level expressions and collects all such expressions of the form `:y = ...`, i.e. assignments to a -`Symbol`, which is a syntax error outside of the macro. At the end of the +`Symbol`, which is a syntax error outside of DataFramesMeta.jl macros. At the end of the expression, all assignments are collected into a `NamedTuple` to be used with the `AsTable` destination in the DataFrames.jl transformation mini-language. From 7cc8947b38aabaeda9f6be6615fadf7f2285036e Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 21 Sep 2021 06:58:53 -0400 Subject: [PATCH 13/27] update index.md --- docs/src/index.md | 31 +++++++++++++++++++++++++++++-- src/macros.jl | 4 ++++ src/parsing_astable.jl | 9 +++++---- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index b110d01e..e643c638 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -22,6 +22,7 @@ In addition, DataFramesMeta provides convenient syntax. * `@byrow` for applying functions to each row of a data frame (only supported inside other macros). * `@passmissing` for propagating missing values inside row-wise DataFramesMeta.jl transformations. +* `@astable` to create multiple columns within a single transformation. * `@chain`, from [Chain.jl](https://github.com/jkrumbiegel/Chain.jl) for piping the above macros together, similar to [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html)'s `%>%` in R. @@ -396,11 +397,37 @@ julia> @rtransform df @passmissing x = parse(Int, :x_str) 3 │ missing missing ``` +## Creating multiple columns at once with `@astable` + +Often new variables may depend on the same intermediate calculations. `@astable` makes it easy to create multiple +new variables in the same operation, yet have them share +information. + +In a single block, all assignments of the form `:y = f(:x)` +or `$y = f(:x)` at the top-level are generate new columns. + +``` +julia> df = DataFrame(a = [1, 2, 3], b = [400, 500, 600]); + +julia> @transform df @astable begin + ex = extrema(:b) + :b_first = :b .- first(ex) + :b_last = :b .- last(ex) + end +3×4 DataFrame + Row │ a b b_first b_last + │ Int64 Int64 Int64 Int64 +─────┼─────────────────────────────── + 1 │ 1 400 0 -200 + 2 │ 2 500 100 -100 + 3 │ 3 600 200 0 +``` + + ## [Working with column names programmatically with `$`](@id dollar) DataFramesMeta provides the special syntax `$` for referring to -columns in a data frame via a `Symbol`, string, or column position as either -a literal or a variable. +columns in a data frame via a `Symbol`, string, or column position as either a literal or a variable. ```julia df = DataFrame(A = 1:3, B = [2, 1, 2]) diff --git a/src/macros.jl b/src/macros.jl index 41f2bb46..9780f620 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -422,6 +422,10 @@ transformations. `@astable` solves this problem :new_col_2 = :new_col_1 + :z end +Column assignment in `@astable` follows the same rules as +column assignment more generally. Construct a new column +from a string by escaping it with `$DOLLAR`. + ### Examples ``` diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index 6149dc9f..09d46995 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -2,8 +2,10 @@ function conditionally_add_symbols!(inputs_to_function::AbstractDict, lhs_assignments::OrderedCollections.OrderedDict, col) # if it's already been assigned at top-level, # don't add it to the inputs - return get!(lhs_assignments, col) do - gensym() + if haskey(lhs_assignments, col) + return lhs_assignments[col] + else + return addkey!(inputs_to_function, col) end end @@ -67,8 +69,7 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) new_lhs = inputs_to_function[lhs] lhs_assignments[lhs] = new_lhs else - new_lhs = gensym() - lhs_assignments[lhs] = new_lhs + new_lhs = addkey!(lhs_assignments, lhs) end Expr(:(=), new_lhs, new_ex) From ab9bae47ae3a17d7070724f5c2a8f5f248f595f5 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 22 Sep 2021 06:29:25 -0400 Subject: [PATCH 14/27] clean named tuple creation --- docs/src/index.md | 3 ++- src/parsing_astable.jl | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index e643c638..69d6db28 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -404,7 +404,8 @@ new variables in the same operation, yet have them share information. In a single block, all assignments of the form `:y = f(:x)` -or `$y = f(:x)` at the top-level are generate new columns. +or `$y = f(:x)` at the top-level generate new columns. In the 2nd example, `y` +must be a string, `Symbol`. ``` julia> df = DataFrame(a = [1, 2, 3], b = [400, 500, 600]); diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index 09d46995..d6f62035 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -53,6 +53,12 @@ end # No docstring so assumed unstable block(ex) = isexpr(ex, :block) ? ex : :($ex;) +sym_or_str_to_sym(x::Union{AbstractString, Symbol}) = Symbol(x) +function sym_or_str_to_sym(x) + e = "New columns created inside @astable must be Symbols or AbstractStrings" + throw(ArgumentError(e)) +end + function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) inputs_to_function = Dict{Any, Symbol}() lhs_assignments = OrderedCollections.OrderedDict{Any, Symbol}() @@ -80,7 +86,7 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) source = :(DataFramesMeta.make_source_concrete($(Expr(:vect, keys(inputs_to_function)...)))) inputargs = Expr(:tuple, values(inputs_to_function)...) - nt_iterator = (:(Symbol($k) => $v) for (k, v) in lhs_assignments) + nt_iterator = (:(DataFramesMeta.sym_or_str_to_sym($k) => $v) for (k, v) in lhs_assignments) nt_expr = Expr(:tuple, Expr(:parameters, nt_iterator...)) body = Expr(:block, Expr(:block, exprs...), nt_expr) From 495f08ac763e41f7fc8e2ee18761e94cf0b54635 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 22 Sep 2021 07:19:53 -0400 Subject: [PATCH 15/27] add example with string --- docs/src/index.md | 2 +- src/macros.jl | 18 ++++++++++++++++-- test/astable_flag.jl | 7 +++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 69d6db28..6e6dda6e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -405,7 +405,7 @@ information. In a single block, all assignments of the form `:y = f(:x)` or `$y = f(:x)` at the top-level generate new columns. In the 2nd example, `y` -must be a string, `Symbol`. +must be a string or `Symbol`. ``` julia> df = DataFrame(a = [1, 2, 3], b = [400, 500, 600]); diff --git a/src/macros.jl b/src/macros.jl index 9780f620..56a68291 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -351,7 +351,7 @@ macro passmissing(args...) end """ - astable(args...) + @astable(args...) Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macros. @@ -391,7 +391,7 @@ transform(df, [:a] => ByRow(f) => AsTable) `@astable` has two major advantages at the cost of increasing complexity. First, `@astable` makes it easy to create multiple columns from a single transformation, which share a scope. For example, `@astable` allows -for the following +for the following (where `:x` and `:x_2` exist in the `DataFrame` already). ``` @transform df @astable begin @@ -457,6 +457,20 @@ julia> @by df :a @astable begin ─────┼───────────────────── 1 │ 1 5 6 2 │ 2 70 80 + +julia> @rtransform df @astable begin + f_a = first(:a) + $(DOLLAR)new_col = :a + :b + f_a + :y = :a * :b + end +4×4 DataFrame + Row │ a b New Column y + │ Int64 Int64 Int64 Int64 +─────┼───────────────────────────────── + 1 │ 1 5 7 5 + 2 │ 1 6 8 6 + 3 │ 2 70 74 140 + 4 │ 2 80 84 160 ``` """ diff --git a/test/astable_flag.jl b/test/astable_flag.jl index fcdc88b3..1ea1d2b4 100644 --- a/test/astable_flag.jl +++ b/test/astable_flag.jl @@ -121,4 +121,11 @@ end end + +@testset "bad assignments" begin + @eval df = DataFrame(y = 1) + @test_throws ArgumentError @eval @transform df @astable cols(1) = :y + @test_throws ArgumentError @eval @transform df @astable cols(AsTable) = :y +end + end # module \ No newline at end of file From 01cb5e7b884eeb283e8c816c8a822277f566c9d8 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 22 Sep 2021 10:43:48 -0400 Subject: [PATCH 16/27] grouping tests --- src/macros.jl | 10 +++++++--- test/astable_flag.jl | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 56a68291..5462032f 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -284,7 +284,7 @@ end """ - passmissing(args...) + @passmissing(args...) Propograte missing values inside DataFramesMeta.jl macros. @@ -353,7 +353,8 @@ end """ @astable(args...) -Return a `NamedTuple` from a single transformation inside DataFramesMeta.jl macros. +Return a `NamedTuple` from a single transformation inside the DataFramesMeta.jl +macros, `@select`, `@transform`, and their mutating and row-wise equivalents. `@astable` acts on a single block. It works through all top-level expressions and collects all such expressions of the form `:y = ...`, i.e. assignments to a @@ -424,7 +425,10 @@ end Column assignment in `@astable` follows the same rules as column assignment more generally. Construct a new column -from a string by escaping it with `$DOLLAR`. +from a string by escaping it with `$DOLLAR`, which can be a +`Symbol` or an `AbstractString`. References to existing +columns may be a `Symbol`, `AbstractString`, or an +integer. ### Examples diff --git a/test/astable_flag.jl b/test/astable_flag.jl index 1ea1d2b4..1ca9b6fd 100644 --- a/test/astable_flag.jl +++ b/test/astable_flag.jl @@ -120,6 +120,44 @@ end @test d == DataFrame(x = 1, z = 6) end +@testset "grouping astable flag" begin + df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 7, 8]) + + gd = groupby(df, :a) + + d = @combine gd @astable begin + ex = extrema(:b) + :b_min = ex[1] + :b_max = ex[2] + end + + @test sort(d.b_min) == [5, 7] + + d = @combine gd @astable begin + ex = extrema(:b) + $"b_min" = ex[1] + $"b_max" = ex[2] + end + + @test sort(d.b_min) == [5, 7] + + d = @by df :a @astable begin + ex = extrema(:b) + :b_min = ex[1] + :b_max = ex[2] + end + + @test sort(d.b_min) == [5, 7] + + d = @by df :a @astable begin + ex = extrema(:b) + $"b_min" = ex[1] + $"b_max" = ex[2] + end + + @test sort(d.b_min) == [5, 7] +end + @testset "bad assignments" begin From 01fb3b72f5d390cc1d36445549e9999f59baf696 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Wed, 22 Sep 2021 16:23:13 -0400 Subject: [PATCH 17/27] Update src/macros.jl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bogumił Kamiński --- src/macros.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macros.jl b/src/macros.jl index 5462032f..02f86211 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -392,7 +392,7 @@ transform(df, [:a] => ByRow(f) => AsTable) `@astable` has two major advantages at the cost of increasing complexity. First, `@astable` makes it easy to create multiple columns from a single transformation, which share a scope. For example, `@astable` allows -for the following (where `:x` and `:x_2` exist in the `DataFrame` already). +for the following (where `:x` and `:x_2` exist in the data frame already). ``` @transform df @astable begin From 915191c20d4de25b7efcf1deca030bb1fd7e8372 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 23 Sep 2021 06:08:40 -0400 Subject: [PATCH 18/27] changes --- src/macros.jl | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 5462032f..cbee5d3c 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -357,11 +357,11 @@ Return a `NamedTuple` from a single transformation inside the DataFramesMeta.jl macros, `@select`, `@transform`, and their mutating and row-wise equivalents. `@astable` acts on a single block. It works through all top-level expressions -and collects all such expressions of the form `:y = ...`, i.e. assignments to a -`Symbol`, which is a syntax error outside of DataFramesMeta.jl macros. At the end of the -expression, all assignments are collected into a `NamedTuple` to be used -with the `AsTable` destination in the DataFrames.jl transformation -mini-language. +and collects all such expressions of the form `:y = ...` or `$y = ...`, i.e. assignments to a +`Symbol` or an escaped column identifier, which is a syntax error outside of +DataFramesMeta.jl macros. At the end of the expression, all assignments are collected +into a `NamedTuple` to be used with the `AsTable` destination in the DataFrames.jl +transformation mini-language. Concretely, the expressions @@ -423,12 +423,22 @@ transformations. `@astable` solves this problem :new_col_2 = :new_col_1 + :z end -Column assignment in `@astable` follows the same rules as -column assignment more generally. Construct a new column -from a string by escaping it with `$DOLLAR`, which can be a -`Symbol` or an `AbstractString`. References to existing -columns may be a `Symbol`, `AbstractString`, or an -integer. +Column assignment in `@astable` follows similar rules as +column assignment in other DataFramesMeta.jl macros. The left- +-hand-side of a column assignment can be either a `Symbol` or any +expression which evaluates to a `Symbol` or `AbstractString`. For example +`:y = ...`, and `$y = ...` are both valid ways of assigning a new column. +However unlike other DataFramesMeta.jl macros, multi-column assignments via +`AsTable` are disallowed. The following will fail. + +``` +@transform df @astable begin + $AsTable = :x +end +``` + +References to existing columns also follow the same +rules as other DataFramesMeta.jl macros. ### Examples @@ -462,6 +472,8 @@ julia> @by df :a @astable begin 1 │ 1 5 6 2 │ 2 70 80 +julia> new_col = "New Column"; + julia> @rtransform df @astable begin f_a = first(:a) $(DOLLAR)new_col = :a + :b + f_a From 2ce4d9ed803398746b58887c083193d04eb6fe0b Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 23 Sep 2021 06:17:22 -0400 Subject: [PATCH 19/27] fix some errors --- src/macros.jl | 7 +++---- src/parsing_astable.jl | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index e38c24d9..6198349c 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -282,11 +282,10 @@ macro byrow(args...) throw(ArgumentError("@byrow is deprecated outside of DataFramesMeta macros.")) end - """ @passmissing(args...) -Propograte missing values inside DataFramesMeta.jl macros. +Propagrate missing values inside DataFramesMeta.jl macros. `@passmissing` is not a "real" Julia macro but rather serves as a "flag" @@ -357,7 +356,7 @@ Return a `NamedTuple` from a single transformation inside the DataFramesMeta.jl macros, `@select`, `@transform`, and their mutating and row-wise equivalents. `@astable` acts on a single block. It works through all top-level expressions -and collects all such expressions of the form `:y = ...` or `$y = ...`, i.e. assignments to a +and collects all such expressions of the form `:y = ...` or `$(DOLLAR)y = ...`, i.e. assignments to a `Symbol` or an escaped column identifier, which is a syntax error outside of DataFramesMeta.jl macros. At the end of the expression, all assignments are collected into a `NamedTuple` to be used with the `AsTable` destination in the DataFrames.jl @@ -427,7 +426,7 @@ Column assignment in `@astable` follows similar rules as column assignment in other DataFramesMeta.jl macros. The left- -hand-side of a column assignment can be either a `Symbol` or any expression which evaluates to a `Symbol` or `AbstractString`. For example -`:y = ...`, and `$y = ...` are both valid ways of assigning a new column. +`:y = ...`, and `$(DOLLAR)y = ...` are both valid ways of assigning a new column. However unlike other DataFramesMeta.jl macros, multi-column assignments via `AsTable` are disallowed. The following will fail. diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index d6f62035..d78138d4 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -88,6 +88,7 @@ function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) inputargs = Expr(:tuple, values(inputs_to_function)...) nt_iterator = (:(DataFramesMeta.sym_or_str_to_sym($k) => $v) for (k, v) in lhs_assignments) nt_expr = Expr(:tuple, Expr(:parameters, nt_iterator...)) + body = Expr(:block, Expr(:block, exprs...), nt_expr) fun = quote From 713eaf08608afcd6dccc9228a7a0c53defa9ed93 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Thu, 23 Sep 2021 10:18:05 -0400 Subject: [PATCH 20/27] Update src/parsing_astable.jl Co-authored-by: Milan Bouchet-Valat --- src/parsing_astable.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/parsing_astable.jl b/src/parsing_astable.jl index d78138d4..16987d69 100644 --- a/src/parsing_astable.jl +++ b/src/parsing_astable.jl @@ -54,10 +54,8 @@ end block(ex) = isexpr(ex, :block) ? ex : :($ex;) sym_or_str_to_sym(x::Union{AbstractString, Symbol}) = Symbol(x) -function sym_or_str_to_sym(x) - e = "New columns created inside @astable must be Symbols or AbstractStrings" - throw(ArgumentError(e)) -end +sym_or_str_to_sym(x) = + throw(ArgumentError("New columns created inside @astable must be Symbols or AbstractStrings")) function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) inputs_to_function = Dict{Any, Symbol}() From 4e01c4ac92889bd35b823bf145a4c724dd8806e3 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 23 Sep 2021 06:30:24 -0400 Subject: [PATCH 21/27] add snipper to transform, select, combine, by --- src/macros.jl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/macros.jl b/src/macros.jl index 6198349c..41e6b728 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -349,6 +349,12 @@ macro passmissing(args...) throw(ArgumentError("@passmissing only works inside DataFramesMeta macros.")) end +global astable_docstring_snippet = """ + Transformations can also use the macro-flag `@astable` for creating multiple + new columns at once and letting transformations share the same name-space. + See `? @astable` for more details. + """ + """ @astable(args...) @@ -1240,6 +1246,8 @@ transformations by row, `@transform` allows `@byrow` at the beginning of a block of transformations (i.e. `@byrow begin... end`). All transformations in the block will operate by row. +$astable_docstring_snippet + ### Examples ```jldoctest @@ -1376,6 +1384,8 @@ transform!ations by row, `@transform!` allows `@byrow` at the beginning of a block of transform!ations (i.e. `@byrow begin... end`). All transform!ations in the block will operate by row. +$astable_docstring_snippet + ### Examples ```jldoctest @@ -1488,6 +1498,8 @@ transformations by row, `@select` allows `@byrow` at the beginning of a block of selectations (i.e. `@byrow begin... end`). All transformations in the block will operate by row. +$astable_docstring_snippet + ### Examples ```jldoctest @@ -1608,6 +1620,8 @@ transformations by row, `@select!` allows `@byrow` at the beginning of a block of select!ations (i.e. `@byrow begin... end`). All transformations in the block will operate by row. +$astable_docstring_snippet + ### Examples ```jldoctest @@ -1724,6 +1738,8 @@ and @combine(df, :mx = mean(:x), :sx = std(:x)) ``` +$astable_docstring_snippet + ### Examples ```julia @@ -1840,6 +1856,8 @@ and @by(df, :g, mx = mean(:x), sx = std(:x)) ``` +$astable_docstring_snippet + ### Examples ```julia From 57b40512c519d893d717c68a2d5c6809491cee25 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 23 Sep 2021 06:34:29 -0400 Subject: [PATCH 22/27] add macro check --- src/parsing.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/parsing.jl b/src/parsing.jl index a4ced7fd..a8a263f0 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -127,6 +127,9 @@ function check_macro_flags_consistency(exprflags) if !exprflags[BYROW_SYM][] s = "The `@passmissing` flag is currently only allowed with the `@byrow` flag" throw(ArgumentError(s)) + elseif exprflags[ASTABLE_SYM][] + s = "The `@passmissing` flag is currently not allowed with the `@astable` flag" + throw(ArgumentError(s)) end end end From da7674d9b281b7c28be9eb981daa76aac3e26e38 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 23 Sep 2021 06:41:43 -0400 Subject: [PATCH 23/27] add errors for bad flag combo --- src/parsing.jl | 1 - test/astable_flag.jl | 9 ++++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parsing.jl b/src/parsing.jl index a8a263f0..c83df4c2 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -109,7 +109,6 @@ function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS)) return (ex, exprflags) end end - return (ex, exprflags) end diff --git a/test/astable_flag.jl b/test/astable_flag.jl index 1ca9b6fd..012b7a4f 100644 --- a/test/astable_flag.jl +++ b/test/astable_flag.jl @@ -158,7 +158,14 @@ end @test sort(d.b_min) == [5, 7] end - +@testset "errors with passmissing" begin + @eval df = DataFrame(y = 1) + @test_throws LoadError @eval @transform df @passmising @byrow @astable :x = 2 + @test_throws LoadError @eval @transform df @byrow @astable @passmissing :x = 2 + @test_throws LoadError @eval @transform df @astable @passmissing @byrow :x = 2 + @test_throws LoadError @eval @rtransform df @astable @passmissing :x = 2 + @test_throws LoadError @eval @rtransform df @passmissing @astable :x = 2 +end @testset "bad assignments" begin @eval df = DataFrame(y = 1) From 285e3acf17e20c15394540e5afac2bfdaa8ec8a7 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 23 Sep 2021 09:58:25 -0400 Subject: [PATCH 24/27] better grouping tests --- test/astable_flag.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/astable_flag.jl b/test/astable_flag.jl index 012b7a4f..6c063840 100644 --- a/test/astable_flag.jl +++ b/test/astable_flag.jl @@ -131,7 +131,9 @@ end :b_max = ex[2] end - @test sort(d.b_min) == [5, 7] + res_sorted = DataFrame(a = [1, 2], b_min = [5, 7], b_max = [6, 8]) + + @test sort(d, :b_min) == res_sorted d = @combine gd @astable begin ex = extrema(:b) @@ -139,7 +141,7 @@ end $"b_max" = ex[2] end - @test sort(d.b_min) == [5, 7] + @test sort(d, :b_min) == res_sorted d = @by df :a @astable begin ex = extrema(:b) @@ -147,7 +149,7 @@ end :b_max = ex[2] end - @test sort(d.b_min) == [5, 7] + @test sort(d, :b_min) == res_sorted d = @by df :a @astable begin ex = extrema(:b) @@ -155,7 +157,7 @@ end $"b_max" = ex[2] end - @test sort(d.b_min) == [5, 7] + @test sort(d, :b_min) == res_sorted end @testset "errors with passmissing" begin From 09c692a3e2467ebab86cca9bb89986745c998e36 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 23 Sep 2021 12:56:37 -0400 Subject: [PATCH 25/27] add mutating tests --- src/parsing.jl | 2 ++ test/astable_flag.jl | 48 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/parsing.jl b/src/parsing.jl index c83df4c2..4708011f 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -226,6 +226,8 @@ function get_source_fun(function_expr; exprflags = deepcopy(DEFAULT_FLAGS)) end end + println(MacroTools.prettify(fun)) + return source, fun end diff --git a/test/astable_flag.jl b/test/astable_flag.jl index 6c063840..01a2b32b 100644 --- a/test/astable_flag.jl +++ b/test/astable_flag.jl @@ -11,7 +11,6 @@ const ≅ = isequal d = @rtransform df @astable begin :x = 1 - nothing end @@ -42,10 +41,6 @@ const ≅ = isequal @test d == DataFrame(x = 5) end -@testset "@astable with just assignments, mutating" begin - # After finalizing above testset -end - @testset "@astable with strings" begin df = DataFrame(a = 1, b = 2) @@ -120,6 +115,49 @@ end @test d == DataFrame(x = 1, z = 6) end +@testset "@astable with mutation" begin + df = DataFrame(a = 1, b = 2) + + df2 = copy(df) + d = @rtransform! df2 @astable begin + :x = 1 + nothing + end + + @test d == DataFrame(a = 1, b = 2, x = 1) + @test d === df2 + + df2 = copy(df) + d = @rselect! df2 @astable begin + :x = 1 + y = 100 + nothing + end + + @test d == DataFrame(x = 1) + @test d === df2 + + df2 = copy(df) + d = @transform! df2 @astable begin + :x = [5] + y = 100 + nothing + end + + @test d == DataFrame(a = 1, b = 2, x = 5) + @test d === df2 + + df2 = copy(df) + d = @select! df2 @astable begin + :x = [5] + y = 100 + nothing + end + + @test d == DataFrame(x = 5) + @test d === df2 +end + @testset "grouping astable flag" begin df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 7, 8]) From ae26da89baa20392bad764bb0601ea3bac44e97f Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Fri, 24 Sep 2021 05:03:59 -0400 Subject: [PATCH 26/27] get rid of debugging printin --- src/parsing.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parsing.jl b/src/parsing.jl index 4708011f..c83df4c2 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -226,8 +226,6 @@ function get_source_fun(function_expr; exprflags = deepcopy(DEFAULT_FLAGS)) end end - println(MacroTools.prettify(fun)) - return source, fun end From a7fd1a29badf3ae305c117217b38f03993deedb1 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Fri, 24 Sep 2021 09:11:04 -0400 Subject: [PATCH 27/27] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 2 +- src/macros.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 6e6dda6e..2ac92866 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -404,7 +404,7 @@ new variables in the same operation, yet have them share information. In a single block, all assignments of the form `:y = f(:x)` -or `$y = f(:x)` at the top-level generate new columns. In the 2nd example, `y` +or `$y = f(:x)` at the top-level generate new columns. In the second example, `y` must be a string or `Symbol`. ``` diff --git a/src/macros.jl b/src/macros.jl index 41e6b728..7fbfad8f 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -285,7 +285,7 @@ end """ @passmissing(args...) -Propagrate missing values inside DataFramesMeta.jl macros. +Propagate missing values inside DataFramesMeta.jl macros. `@passmissing` is not a "real" Julia macro but rather serves as a "flag" @@ -349,8 +349,8 @@ macro passmissing(args...) throw(ArgumentError("@passmissing only works inside DataFramesMeta macros.")) end -global astable_docstring_snippet = """ - Transformations can also use the macro-flag `@astable` for creating multiple +const astable_docstring_snippet = """ + Transformations can also use the macro-flag [`@astable`](@ref) for creating multiple new columns at once and letting transformations share the same name-space. See `? @astable` for more details. """