diff --git a/common/src/main/pyhmda/parse_census_file.py b/common/src/main/pyhmda/parse_census_file.py index 9be80f8acd..fc3b0c4a36 100755 --- a/common/src/main/pyhmda/parse_census_file.py +++ b/common/src/main/pyhmda/parse_census_file.py @@ -24,15 +24,14 @@ def conv_scf(val: str) -> str: if val not in ["T", "S", "I"]: raise ValueError(f"invalid scf: \"{val}\"") return val - census_file_columns = { - 0: ["CollectionYear", conv_num], 1: ["CBSACode", conv_dgstr], 2: ["FIPSStateCode", conv_dgstr], - 3: ["FIPSCountyCode", conv_dgstr], 4: ["CensusTract", conv_dgstr], - 13: ["FFIECMedianFamilyIncome", conv_num], 22: ["Population", conv_optnum], - 28: ["MinorityPopulationPct", conv_optpct], 879: ["NumOwnerOccupiedUnits", conv_optnum], - 899: ["Num1To4FamilyUnits", conv_optnum], 585: ["TractMFI", conv_optnum], - 12: ["TractToMSAIncomePct", conv_optpct], 1057: ["MedianAge", conv_optnum], - 6: ["SmallCounty", conv_scf] + 0: ["Collection Year", conv_num], 1: ["MSA/MD", conv_dgstr], 2: ["State", conv_dgstr], + 3: ["County", conv_dgstr], 4: ["Census Tract", conv_dgstr], + 13: ["FFIEC Median Family Income", conv_num], 22: ["Population", conv_optnum], + 28: ["Minority Population %", conv_optpct], 879: ["Number of Owner Occupied Units", conv_optnum], + 899: ["Number of 1 to 4 Family Units", conv_optnum], 585: ["Tract MFI", conv_optnum], + 12: ["Tract to MSA Income %", conv_optpct], 1057: ["Median Age", conv_optnum], + 6: ["Small County", conv_scf] } cfkeys = census_file_columns.keys() cfcolnames = {k: v[0] for k, v in census_file_columns.items()} @@ -50,7 +49,7 @@ def conv_scf(val: str) -> str: delineation_file_columns = { 3: ["CBSATitle", conv_str], 5: ["MDTitle", conv_optstr], - 9: ["FIPSStateCode", conv_dgstr], 10: ["FIPSCountyCode", conv_dgstr] + 9: ["State", conv_dgstr], 10: ["County", conv_dgstr] } dfkeys = delineation_file_columns.keys() dfcolnames = {k: v[0] for k, v in delineation_file_columns.items()} @@ -59,17 +58,17 @@ def conv_scf(val: str) -> str: converters=dfconverters).rename(dfcolnames, axis=1) logging.info(f"Parsed {prepared_file}") -parsed_delin_df["MSAOrMDTitle"] = parsed_delin_df.apply(lambda row: +parsed_delin_df["MSA/MD Name"] = parsed_delin_df.apply(lambda row: row.MDTitle if pd.notna(row.MDTitle) else row.CBSATitle, axis=1) parsed_delin_df.drop(columns=["CBSATitle", "MDTitle"], inplace=True) -logging.info("Calculated MSAOrMDTitles") +logging.info("Calculated MSA/MD Names") output_file = args.output_file if args.output_file \ else f"{os.path.splitext(args.censusfile)[0]}-parsed.txt" output_df = parsed_census_df.merge(parsed_delin_df, - how="left", on=["FIPSStateCode", "FIPSCountyCode"]) -output_df["MSAOrMDTitle"] = output_df.apply(lambda row: - "" if row.CBSACode == "99999" else row.MSAOrMDTitle, axis=1) + how="left", on=["State", "County"]) +output_df["MSA/MD Name"] = output_df.apply(lambda row: + "" if row["MSA/MD"] == "99999" else row["MSA/MD Name"], axis=1) output_df.to_csv(output_file, sep='|', index=False) logging.info(f"Wrote output file {output_file}") os.remove(prepared_file) diff --git a/common/src/main/pyhmda/utils.py b/common/src/main/pyhmda/utils.py index 58e3e44a92..d71ac214f4 100644 --- a/common/src/main/pyhmda/utils.py +++ b/common/src/main/pyhmda/utils.py @@ -87,7 +87,7 @@ def apply_authorized_modifications(modmap: dict, df: pd.DataFrame) -> pd.DataFra # Census Flat File Modifications def replace_MedianAge_2002_values(df: pd.DataFrame) -> pd.DataFrame: - df.loc[df["MedianAge"] == 2002, "MedianAge"] = 6 + df.loc[df["Median Age"] == 2002, "Median Age"] = 6 return df