diff --git a/README.md b/README.md index 60186a4..dfbf48e 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ the dataframe is mature, we can easily integrate it with an SQL engine. package simple import ( + "fmt" "github.com/stretchr/testify/assert" "testing" "tiny_dataframe/pkg/a_engine" @@ -28,28 +29,27 @@ import ( func TestParquetFile(t *testing.T) { ctx := engine.NewContext() df, err := ctx.Parquet("../../test/data/c1_c2_c3_int64.parquet", nil) - - _ = df.Show() - /* - +-----+-----+-----+ - | C1 | C2 | C3 | - +-----+-----+-----+ - | 100 | 101 | 102 | - | 100 | 201 | 202 | - | 100 | 301 | 302 | - | 200 | 401 | 402 | - | 200 | 501 | 502 | - | 300 | 601 | 602 | - +-----+-----+-----+ - */ if err != nil { t.Error(err) } + _ = df.Show() + /* + +-----+-----+-----+ + | C1 | C2 | C3 | + +-----+-----+-----+ + | 100 | 101 | 102 | + | 100 | 201 | 202 | + | 100 | 301 | 302 | + | 200 | 401 | 402 | + | 200 | 501 | 502 | + | 300 | 601 | 602 | + +-----+-----+-----+ + */ df = df. - Filter(logicalplan.Eq( + Filter(logicalplan.Lt( logicalplan.ColumnExpr{Name: "c1"}, - logicalplan.LiteralInt64Expr{Val: 200}, + logicalplan.LiteralInt64Expr{Val: 300}, )). Project( logicalplan.ColumnExpr{Name: "c1"}, @@ -67,32 +67,34 @@ func TestParquetFile(t *testing.T) { logicalPlan, _ := df.LogicalPlan() fmt.Println(logicalplan.PrettyPrint(logicalPlan, 0)) + assert.Equal(t, "Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)]\n\tProjection: #c1, #c2\n\t\tFilter: #c1 < 300\n\t\t\tInput: ../../test/data/c1_c2_c3_int64.parquet; projExpr=None\n", logicalplan.PrettyPrint(logicalPlan, 0)) /* Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)] Projection: #c1, #c2 - Filter: #c1 = 200 + Filter: #c1 < 300 Input: ../../test/data/c1_c2_c3_int64.parquet; projExpr=None */ logicalPlan, _ = df.OptimizedLogicalPlan() fmt.Println(logicalplan.PrettyPrint(logicalPlan, 0)) + assert.Equal(t, "Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)]\n\tProjection: #c1, #c2\n\t\tFilter: #c1 < 300\n\t\t\tInput: ../../test/data/c1_c2_c3_int64.parquet; projExpr=[c1 c2]\n", logicalplan.PrettyPrint(logicalPlan, 0)) /* - Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)] - Projection: #c1, #c2 - Filter: #c1 = 200 - Input: ../../test/data/c1_c2_c3_int64.parquet; projExpr=[c1 c2] + Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)] + Projection: #c1, #c2 + Filter: #c1 < 300 + Input: ../../test/data/c1_c2_c3_int64.parquet; projExpr=[c1 c2] */ - err = df.Show() if err != nil { t.Error(err) } /* - +-----+---------+ - | #0 | SUM(#1) | - +-----+---------+ - | 200 | 902 | - +-----+---------+ + +-----+---------+ + | #0 | SUM(#1) | + +-----+---------+ + | 100 | 603 | + | 200 | 902 | + +-----+---------+ */ } ``` diff --git a/cmd/simple/engine_test.go b/cmd/simple/engine_test.go index dbbaab9..f7943cf 100644 --- a/cmd/simple/engine_test.go +++ b/cmd/simple/engine_test.go @@ -16,7 +16,18 @@ func TestParquetFile(t *testing.T) { } _ = df.Show() - + /* + +-----+-----+-----+ + | C1 | C2 | C3 | + +-----+-----+-----+ + | 100 | 101 | 102 | + | 100 | 201 | 202 | + | 100 | 301 | 302 | + | 200 | 401 | 402 | + | 200 | 501 | 502 | + | 300 | 601 | 602 | + +-----+-----+-----+ + */ df = df. Filter(logicalplan.Lt( logicalplan.ColumnExpr{Name: "c1"}, @@ -39,13 +50,32 @@ func TestParquetFile(t *testing.T) { logicalPlan, _ := df.LogicalPlan() fmt.Println(logicalplan.PrettyPrint(logicalPlan, 0)) assert.Equal(t, "Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)]\n\tProjection: #c1, #c2\n\t\tFilter: #c1 < 300\n\t\t\tInput: ../../test/data/c1_c2_c3_int64.parquet; projExpr=None\n", logicalplan.PrettyPrint(logicalPlan, 0)) + /* + Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)] + Projection: #c1, #c2 + Filter: #c1 < 300 + Input: ../../test/data/c1_c2_c3_int64.parquet; projExpr=None + */ logicalPlan, _ = df.OptimizedLogicalPlan() fmt.Println(logicalplan.PrettyPrint(logicalPlan, 0)) assert.Equal(t, "Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)]\n\tProjection: #c1, #c2\n\t\tFilter: #c1 < 300\n\t\t\tInput: ../../test/data/c1_c2_c3_int64.parquet; projExpr=[c1 c2]\n", logicalplan.PrettyPrint(logicalPlan, 0)) - + /* + Aggregate: groupExpr=[#c1], aggregateExpr=[sum(#c2)] + Projection: #c1, #c2 + Filter: #c1 < 300 + Input: ../../test/data/c1_c2_c3_int64.parquet; projExpr=[c1 c2] + */ err = df.Show() if err != nil { t.Error(err) } + /* + +-----+---------+ + | #0 | SUM(#1) | + +-----+---------+ + | 100 | 603 | + | 200 | 902 | + +-----+---------+ + */ }