Skip to content

Commit

Permalink
Updated for chutils 1.10
Browse files Browse the repository at this point in the history
Added bucket field
  • Loading branch information
invertedv committed Aug 30, 2022
1 parent b0eb3c9 commit c6dd643
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 35 deletions.
4 changes: 3 additions & 1 deletion collapse/collapse.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// There are nested tables:
// - monthly. These are values that change every month.
// - qa. The qa table.
//
package collapse

import (
Expand Down Expand Up @@ -54,6 +53,8 @@ func GroupBy(sourceTable string, table string, harpTable string, create bool, co
}
// added details for new fields
switch f.Name {
case "bucket":
f.Description = "loan bucket"
case "ageFpDt":
f.Description = "age based on fdDt, missing=-1000"
case "harpLnId":
Expand Down Expand Up @@ -205,6 +206,7 @@ FROM
GROUP BY lnId)
select
r.*,
toInt32(modulo(arraySum(bitPositionsToArray(reinterpretAsUInt64(substr(r.lnId, 5, 8)))), 20)) AS bucket,
v.harpLnId,
x.oldLnId AS preHarpId,
q.qa AS field,
Expand Down
52 changes: 26 additions & 26 deletions fannie.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,34 @@
// The final result is a single table with nested arrays for time-varying fields.
// Key features of this package:
// - New fields created are:
// - vintage (e.g. 2010Q2)
// - standard - Y/N flag, Y=standard process loan
// - loan age based on first pay date
// - numeric dq field
// - property value at origination
// - harp - Y/N flag, Y=HARP loan.
// - file name from which the loan was loaded
// - QA results. There are three sets of fields:
// - The nested table qa that has two arrays:
// - field. The name of a field that has validation issues.
// - cntFail. The number of months for which this field failed qa. For static fields, this value will
// be 1.
// - allFail. An array of field names which failed for qa. For monthly fields, this means the field failed for all months.
// - vintage (e.g. 2010Q2)
// - standard - Y/N flag, Y=standard process loan
// - loan age based on first pay date
// - numeric dq field
// - property value at origination
// - harp - Y/N flag, Y=HARP loan.
// - file name from which the loan was loaded
// - QA results. There are three sets of fields:
// - The nested table qa that has two arrays:
// - field. The name of a field that has validation issues.
// - cntFail. The number of months for which this field failed qa. For static fields, this value will
// be 1.
// - allFail. An array of field names which failed for qa. For monthly fields, this means the field failed for all months.
// - A "DESCRIBE" of the output table provides info on each field.
//
// The command-line parameters are:
// -host ClickHouse IP address. Default: 127.0.0.1.
// -user ClickHouse user. Default: default
// -password ClickHouse password for user. Default: <empty>.
// -table ClickHouse table in which to insert the data.
// -maptable. Clickhouse table that maps pre-HARP loan ids to HARP ids. This table is both created and used by the package.
// -create if Y, then the table is created/reset. Default: Y.
// -dir directory with Fannie Mae text files.
// -tmp ClickHouse database to use for temporary tables.
// -concur # of concurrent processes to use in loading monthly files. Default: 1.
// -memory max memory usage by ClickHouse. Default: 40000000000.
// -groupby max_bytes_before_external_groupby ClickHouse paramter. Default: 20000000000.
//
// -host ClickHouse IP address. Default: 127.0.0.1.
// -user ClickHouse user. Default: default
// -password ClickHouse password for user. Default: <empty>.
// -table ClickHouse table in which to insert the data.
// -maptable. Clickhouse table that maps pre-HARP loan ids to HARP ids. This table is both created and used by the package.
// -create if Y, then the table is created/reset. Default: Y.
// -dir directory with Fannie Mae text files.
// -tmp ClickHouse database to use for temporary tables.
// -concur # of concurrent processes to use in loading monthly files. Default: 1.
// -memory max memory usage by ClickHouse. Default: 40000000000.
// -groupby max_bytes_before_external_groupby ClickHouse paramter. Default: 20000000000.
//
// The non-standard loans have four additional fields. This package recognizes whether the file is standard or not.
// A combined table can be built by running the app twice pointing to the same -table.
Expand Down Expand Up @@ -139,11 +140,10 @@ func main() {
fmt.Printf("Done with %s. %d out of %d ,times: %0.2f, %0.2f minutes\n", fileName, ind+1, len(fileList), step1, step2)
step1Time += step1
step2Time += step2

}
step1Time /= 60.0
step2Time /= 60.0
fmt.Printf("step1 time: %0.2f step2 time: %0.2f hours, total: %0.2f", step1Time, step2Time, step1Time+step2Time)
fmt.Printf("step1 time: %0.2f step2 time: %0.2f hours, total: %0.2f\n", step1Time, step2Time, step1Time+step2Time)
// clean up
_, _ = con.Exec(fmt.Sprintf("DROP TABLE %s.source", *tmp))
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.18
require (
github.com/ClickHouse/clickhouse-go/v2 v2.0.14 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/invertedv/chutils v0.2.2 // indirect
github.com/invertedv/chutils v1.1.10 // indirect
github.com/paulmach/orb v0.7.1 // indirect
github.com/pierrec/lz4/v4 v4.1.14 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
Expand Down
14 changes: 7 additions & 7 deletions raw/raw.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// Package raw reads in the raw data -- either the standard or non-standard files.
// The package also adds a handful of new fields.
//
// - qa. Results of QA. The string field lists every field that failed QA separated by colons.
// - file. Name of the source file.
// - dq. Numeric delinquency level.
// - vintage. Vintage of the loan based on the first pay date. The string format is CCYY"Q"q, for example 2020Q1.
// - propVal. Property value at origination calculated from original balance and LTV.
// - standard. Flag that is Y if the loan is a standard loan.
// - qa. Results of QA. The string field lists every field that failed QA separated by colons.
// - file. Name of the source file.
// - dq. Numeric delinquency level.
// - vintage. Vintage of the loan based on the first pay date. The string format is CCYY"Q"q, for example 2020Q1.
// - propVal. Property value at origination calculated from original balance and LTV.
// - standard. Flag that is Y if the loan is a standard loan.
//
// The output table is <tmp>.source where tmp is the tmp DB specified on the command line
package raw
Expand Down Expand Up @@ -1855,7 +1855,7 @@ func LoadHarpMap(sourceFile string, table string, con *chutils.Connect) (err err
return e
}
wrtr := s.NewWriter(table, con)
if e := chutils.Export(rdr, wrtr, 0); e != nil {
if e := chutils.Export(rdr, wrtr, 0, false); e != nil {
return e
}
return nil
Expand Down

0 comments on commit c6dd643

Please sign in to comment.