Skip to content

Commit

Permalink
Merge pull request #4923 from cfpb/census-file-header
Browse files Browse the repository at this point in the history
Reverting census parser output csv header
  • Loading branch information
PatrickGoRaft authored Nov 5, 2024
2 parents 70f7b05 + d2b8c1e commit c8f3e27
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
27 changes: 13 additions & 14 deletions common/src/main/pyhmda/parse_census_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@ def conv_scf(val: str) -> str:
if val not in ["T", "S", "I"]: raise ValueError(f"invalid scf: \"{val}\"")
return val


census_file_columns = {
0: ["CollectionYear", conv_num], 1: ["CBSACode", conv_dgstr], 2: ["FIPSStateCode", conv_dgstr],
3: ["FIPSCountyCode", conv_dgstr], 4: ["CensusTract", conv_dgstr],
13: ["FFIECMedianFamilyIncome", conv_num], 22: ["Population", conv_optnum],
28: ["MinorityPopulationPct", conv_optpct], 879: ["NumOwnerOccupiedUnits", conv_optnum],
899: ["Num1To4FamilyUnits", conv_optnum], 585: ["TractMFI", conv_optnum],
12: ["TractToMSAIncomePct", conv_optpct], 1057: ["MedianAge", conv_optnum],
6: ["SmallCounty", conv_scf]
0: ["Collection Year", conv_num], 1: ["MSA/MD", conv_dgstr], 2: ["State", conv_dgstr],
3: ["County", conv_dgstr], 4: ["Census Tract", conv_dgstr],
13: ["FFIEC Median Family Income", conv_num], 22: ["Population", conv_optnum],
28: ["Minority Population %", conv_optpct], 879: ["Number of Owner Occupied Units", conv_optnum],
899: ["Number of 1 to 4 Family Units", conv_optnum], 585: ["Tract MFI", conv_optnum],
12: ["Tract to MSA Income %", conv_optpct], 1057: ["Median Age", conv_optnum],
6: ["Small County", conv_scf]
}
cfkeys = census_file_columns.keys()
cfcolnames = {k: v[0] for k, v in census_file_columns.items()}
Expand All @@ -50,7 +49,7 @@ def conv_scf(val: str) -> str:

delineation_file_columns = {
3: ["CBSATitle", conv_str], 5: ["MDTitle", conv_optstr],
9: ["FIPSStateCode", conv_dgstr], 10: ["FIPSCountyCode", conv_dgstr]
9: ["State", conv_dgstr], 10: ["County", conv_dgstr]
}
dfkeys = delineation_file_columns.keys()
dfcolnames = {k: v[0] for k, v in delineation_file_columns.items()}
Expand All @@ -59,17 +58,17 @@ def conv_scf(val: str) -> str:
converters=dfconverters).rename(dfcolnames, axis=1)
logging.info(f"Parsed {prepared_file}")

parsed_delin_df["MSAOrMDTitle"] = parsed_delin_df.apply(lambda row:
parsed_delin_df["MSA/MD Name"] = parsed_delin_df.apply(lambda row:
row.MDTitle if pd.notna(row.MDTitle) else row.CBSATitle, axis=1)
parsed_delin_df.drop(columns=["CBSATitle", "MDTitle"], inplace=True)
logging.info("Calculated MSAOrMDTitles")
logging.info("Calculated MSA/MD Names")

output_file = args.output_file if args.output_file \
else f"{os.path.splitext(args.censusfile)[0]}-parsed.txt"
output_df = parsed_census_df.merge(parsed_delin_df,
how="left", on=["FIPSStateCode", "FIPSCountyCode"])
output_df["MSAOrMDTitle"] = output_df.apply(lambda row:
"" if row.CBSACode == "99999" else row.MSAOrMDTitle, axis=1)
how="left", on=["State", "County"])
output_df["MSA/MD Name"] = output_df.apply(lambda row:
"" if row["MSA/MD"] == "99999" else row["MSA/MD Name"], axis=1)
output_df.to_csv(output_file, sep='|', index=False)
logging.info(f"Wrote output file {output_file}")
os.remove(prepared_file)
2 changes: 1 addition & 1 deletion common/src/main/pyhmda/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def apply_authorized_modifications(modmap: dict, df: pd.DataFrame) -> pd.DataFra
# Census Flat File Modifications

def replace_MedianAge_2002_values(df: pd.DataFrame) -> pd.DataFrame:
df.loc[df["MedianAge"] == 2002, "MedianAge"] = 6
df.loc[df["Median Age"] == 2002, "Median Age"] = 6
return df


Expand Down

0 comments on commit c8f3e27

Please sign in to comment.