diff --git a/collapse/collapse.go b/collapse/collapse.go index 0716282..c4db739 100644 --- a/collapse/collapse.go +++ b/collapse/collapse.go @@ -3,7 +3,6 @@ // There are nested tables: // - monthly. These are values that change every month. // - qa. The qa table. -// package collapse import ( @@ -54,6 +53,8 @@ func GroupBy(sourceTable string, table string, harpTable string, create bool, co } // added details for new fields switch f.Name { + case "bucket": + f.Description = "loan bucket" case "ageFpDt": f.Description = "age based on fdDt, missing=-1000" case "harpLnId": @@ -205,6 +206,7 @@ FROM GROUP BY lnId) select r.*, + toInt32(modulo(arraySum(bitPositionsToArray(reinterpretAsUInt64(substr(r.lnId, 5, 8)))), 20)) AS bucket, v.harpLnId, x.oldLnId AS preHarpId, q.qa AS field, diff --git a/fannie.go b/fannie.go index 139038a..de35792 100644 --- a/fannie.go +++ b/fannie.go @@ -4,33 +4,34 @@ // The final result is a single table with nested arrays for time-varying fields. // Key features of this package: // - New fields created are: -// - vintage (e.g. 2010Q2) -// - standard - Y/N flag, Y=standard process loan -// - loan age based on first pay date -// - numeric dq field -// - property value at origination -// - harp - Y/N flag, Y=HARP loan. -// - file name from which the loan was loaded -// - QA results. There are three sets of fields: -// - The nested table qa that has two arrays: -// - field. The name of a field that has validation issues. -// - cntFail. The number of months for which this field failed qa. For static fields, this value will -// be 1. -// - allFail. An array of field names which failed for qa. For monthly fields, this means the field failed for all months. +// - vintage (e.g. 2010Q2) +// - standard - Y/N flag, Y=standard process loan +// - loan age based on first pay date +// - numeric dq field +// - property value at origination +// - harp - Y/N flag, Y=HARP loan. +// - file name from which the loan was loaded +// - QA results. There are three sets of fields: +// - The nested table qa that has two arrays: +// - field. The name of a field that has validation issues. +// - cntFail. The number of months for which this field failed qa. For static fields, this value will +// be 1. +// - allFail. An array of field names which failed for qa. For monthly fields, this means the field failed for all months. // - A "DESCRIBE" of the output table provides info on each field. // // The command-line parameters are: -// -host ClickHouse IP address. Default: 127.0.0.1. -// -user ClickHouse user. Default: default -// -password ClickHouse password for user. Default: . -// -table ClickHouse table in which to insert the data. -// -maptable. Clickhouse table that maps pre-HARP loan ids to HARP ids. This table is both created and used by the package. -// -create if Y, then the table is created/reset. Default: Y. -// -dir directory with Fannie Mae text files. -// -tmp ClickHouse database to use for temporary tables. -// -concur # of concurrent processes to use in loading monthly files. Default: 1. -// -memory max memory usage by ClickHouse. Default: 40000000000. -// -groupby max_bytes_before_external_groupby ClickHouse paramter. Default: 20000000000. +// +// -host ClickHouse IP address. Default: 127.0.0.1. +// -user ClickHouse user. Default: default +// -password ClickHouse password for user. Default: . +// -table ClickHouse table in which to insert the data. +// -maptable. Clickhouse table that maps pre-HARP loan ids to HARP ids. This table is both created and used by the package. +// -create if Y, then the table is created/reset. Default: Y. +// -dir directory with Fannie Mae text files. +// -tmp ClickHouse database to use for temporary tables. +// -concur # of concurrent processes to use in loading monthly files. Default: 1. +// -memory max memory usage by ClickHouse. Default: 40000000000. +// -groupby max_bytes_before_external_groupby ClickHouse paramter. Default: 20000000000. // // The non-standard loans have four additional fields. This package recognizes whether the file is standard or not. // A combined table can be built by running the app twice pointing to the same -table. @@ -139,11 +140,10 @@ func main() { fmt.Printf("Done with %s. %d out of %d ,times: %0.2f, %0.2f minutes\n", fileName, ind+1, len(fileList), step1, step2) step1Time += step1 step2Time += step2 - } step1Time /= 60.0 step2Time /= 60.0 - fmt.Printf("step1 time: %0.2f step2 time: %0.2f hours, total: %0.2f", step1Time, step2Time, step1Time+step2Time) + fmt.Printf("step1 time: %0.2f step2 time: %0.2f hours, total: %0.2f\n", step1Time, step2Time, step1Time+step2Time) // clean up _, _ = con.Exec(fmt.Sprintf("DROP TABLE %s.source", *tmp)) } diff --git a/go.mod b/go.mod index c4ab0cc..bca99ab 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.18 require ( github.com/ClickHouse/clickhouse-go/v2 v2.0.14 // indirect github.com/google/uuid v1.3.0 // indirect - github.com/invertedv/chutils v0.2.2 // indirect + github.com/invertedv/chutils v1.1.10 // indirect github.com/paulmach/orb v0.7.1 // indirect github.com/pierrec/lz4/v4 v4.1.14 // indirect github.com/shopspring/decimal v1.3.1 // indirect diff --git a/raw/raw.go b/raw/raw.go index 87374d7..03cde65 100644 --- a/raw/raw.go +++ b/raw/raw.go @@ -1,12 +1,12 @@ // Package raw reads in the raw data -- either the standard or non-standard files. // The package also adds a handful of new fields. // -// - qa. Results of QA. The string field lists every field that failed QA separated by colons. -// - file. Name of the source file. -// - dq. Numeric delinquency level. -// - vintage. Vintage of the loan based on the first pay date. The string format is CCYY"Q"q, for example 2020Q1. -// - propVal. Property value at origination calculated from original balance and LTV. -// - standard. Flag that is Y if the loan is a standard loan. +// - qa. Results of QA. The string field lists every field that failed QA separated by colons. +// - file. Name of the source file. +// - dq. Numeric delinquency level. +// - vintage. Vintage of the loan based on the first pay date. The string format is CCYY"Q"q, for example 2020Q1. +// - propVal. Property value at origination calculated from original balance and LTV. +// - standard. Flag that is Y if the loan is a standard loan. // // The output table is .source where tmp is the tmp DB specified on the command line package raw @@ -1855,7 +1855,7 @@ func LoadHarpMap(sourceFile string, table string, con *chutils.Connect) (err err return e } wrtr := s.NewWriter(table, con) - if e := chutils.Export(rdr, wrtr, 0); e != nil { + if e := chutils.Export(rdr, wrtr, 0, false); e != nil { return e } return nil