From 403cdc5d9114e4c152af731ef5f1fb46ca1c02e6 Mon Sep 17 00:00:00 2001 From: Trevor Spreadbury Date: Mon, 30 Sep 2024 15:15:40 +0000 Subject: [PATCH 1/3] fix table.yaml typo --- src/utils/table.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/table.yaml b/src/utils/table.yaml index 294e603..21494b9 100644 --- a/src/utils/table.yaml +++ b/src/utils/table.yaml @@ -142,7 +142,7 @@ Address: transactor: Transactor Membership: - required_attribute: + required_attributes: - member_id - organization_id attributes: From 74bbd33c5132e9a878535b37c629ef8acc524f50 Mon Sep 17 00:00:00 2001 From: Trevor Spreadbury Date: Mon, 30 Sep 2024 15:16:33 +0000 Subject: [PATCH 2/3] fix table id vs index bug and dealing with child tables --- src/utils/yamltable.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/utils/yamltable.py b/src/utils/yamltable.py index b0199c4..105c8e9 100644 --- a/src/utils/yamltable.py +++ b/src/utils/yamltable.py @@ -59,6 +59,7 @@ def __init__(self, data_schema: dict, table_type: str): # noqa ANN204 self.table_type = table_type self.data_schema = data_schema self._child_types_are_separate = None + self._child_types = None self._parent_type = None self._attributes = None self._repeating_columns = None @@ -206,6 +207,8 @@ def child_types_are_separate(self) -> bool: @property def child_types(self) -> list: """Types that inherit attributes from the current type""" + if self._child_types is None: + self._child_types = self.data_schema[self.table_type].get("child_types", []) return self._child_types @property @@ -436,6 +439,7 @@ def _add_forward_relation_to_foreign_table( ] == forward_relation and table_type in [ forward_relation_type, schema.schema[forward_relation_type].parent_type, + *schema.schema[forward_relation_type].child_types, ]: # this means the derivative table requires a column linking back to # the current table @@ -449,7 +453,7 @@ def _add_forward_relation_to_foreign_table( table.loc[ relevant_rows_mask, backlink_column, - ] = table.loc[relevant_rows_mask].index + ] = table.loc[relevant_rows_mask]["id"] foreign_columns_in_base_table.append(backlink_column) foreign_columns_in_foreign_table.append(required_attribute) return table, foreign_columns_in_base_table, foreign_columns_in_foreign_table @@ -553,6 +557,8 @@ def _normalize_table_completely( ].items() if not column.startswith(forward_relation_column) } + # this is where the heavy lifting is done and a new foreign table + # is created derived from the columns that did not belong in base table active_table, foreign_table = _split_prefixed_columns( active_table, table_type, From 29a5d79bd25876bbbb3c01b3c10fe2025aca97d3 Mon Sep 17 00:00:00 2001 From: Trevor Spreadbury Date: Mon, 30 Sep 2024 15:18:28 +0000 Subject: [PATCH 3/3] fix id handling for PA finance data source --- src/utils/finance/states/pennsylvania.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/utils/finance/states/pennsylvania.py b/src/utils/finance/states/pennsylvania.py index e20b01f..d8aaf5d 100644 --- a/src/utils/finance/states/pennsylvania.py +++ b/src/utils/finance/states/pennsylvania.py @@ -120,9 +120,11 @@ def id_columns_to_standardize(self) -> dict: # noqa D102 def _get_additional_columns(self) -> None: super()._get_additional_columns() - # add PA as election state to rows that have election info + # add PA as election state to rows that have election info other than year election_columns = [ - col for col in self.table.columns if col.startswith("election_result--") + col + for col in self.table.columns + if col.startswith("election_result--") and not col.endswith("year") ] election_info_mask = self.table[election_columns].notna().any(axis=1) self.table.loc[election_info_mask, "election_result--election--state"] = "PA" @@ -203,7 +205,7 @@ def id_columns_to_standardize(self) -> dict: # noqa D102 return { "donor_id": [], "recipient_id": ["recipient--election_result--candidate_id"], - "reported_election_id": ["recipient--election_result--election--id"], + "reported_election--id": ["recipient--election_result--election--id"], } @property @@ -219,9 +221,10 @@ def column_details(self) -> pd.DataFrame: # noqa D102 def _get_additional_columns(self) -> None: super()._get_additional_columns() - self.table.loc[:, "reported_election--year"] = self.table.loc[ - :, "recipient--election_result--election--year" - ] + self.table.loc[:, "recipient--election_result--election--year"] = ( + self.table.loc[:, "reported_election--year"] + ) + self.table.loc[:, "reported_election--id"] = None self.table.loc[:, "recipient--election_result--candidate_id"] = None self.table.loc[:, "recipient--election_result--election--id"] = None @@ -308,8 +311,8 @@ def id_columns_to_standardize(self) -> dict: # noqa D102 def _get_additional_columns(self) -> None: super()._get_additional_columns() - self.table.loc[:, "reported_election--year"] = self.table.loc[ - :, "donor--election_result--election--year" + self.table.loc[:, "donor--election_result--election--year"] = self.table.loc[ + :, "reported_election--year" ] self.table.loc[:, "donor--election_result--candidate_id"] = None self.table.loc[:, "donor--election_result--election_id"] = None