diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml index 09ae0976b6..323fc12e43 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/publish-docs.yml @@ -29,7 +29,7 @@ jobs: - name: Build jupyter book run: jb build docs --warningiserror --keep-going # set doc to fail on any sphinx warning - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: docs-build diff --git a/airflow/dags/create_external_tables/ntd_data_products/annual_database_agency_information.yml b/airflow/dags/create_external_tables/ntd_data_products/annual_database_agency_information.yml index 7aaf092756..e4054fc24c 100644 --- a/airflow/dags/create_external_tables/ntd_data_products/annual_database_agency_information.yml +++ b/airflow/dags/create_external_tables/ntd_data_products/annual_database_agency_information.yml @@ -16,125 +16,88 @@ hive_options: source_uri_prefix: "annual-database-agency-information/{dt:DATE}/{ts:TIMESTAMP}/{year:INTEGER}/" schema_fields: - name: number_of_state_counties - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: tam_tier type: STRING - mode: NULLABLE - name: personal_vehicles - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: density type: FLOAT - mode: NULLABLE - name: uza_name type: STRING - mode: NULLABLE - name: tribal_area_name type: STRING - mode: NULLABLE - name: service_area_sq_miles - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: total_voms - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: city type: STRING - mode: NULLABLE - name: fta_recipient_id - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: region - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: state_admin_funds_expended - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: zip_code_ext - type: FLOAT - mode: NULLABLE + type: STRING - name: zip_code - type: FLOAT - mode: NULLABLE + type: STRING - name: ueid type: STRING - mode: NULLABLE - name: address_line_2 type: STRING - mode: NULLABLE - name: number_of_counties_with_service - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: reporter_acronym type: STRING - mode: NULLABLE - name: original_due_date - type: INTEGER - mode: NULLABLE + type: STRING - name: sq_miles - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: address_line_1 type: STRING - mode: NULLABLE - name: p_o__box type: STRING - mode: NULLABLE - name: fy_end_date - type: INTEGER - mode: NULLABLE + type: STRING - name: reported_by_ntd_id type: STRING - mode: NULLABLE - name: population - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: reporting_module type: STRING - mode: NULLABLE - name: service_area_pop - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: subrecipient_type type: STRING - mode: NULLABLE - name: state type: STRING - mode: NULLABLE - name: volunteer_drivers - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: primary_uza - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: doing_business_as type: STRING - mode: NULLABLE - name: reporter_type type: STRING - mode: NULLABLE - name: legacy_ntd_id type: STRING - mode: NULLABLE - name: voms_do - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: url type: STRING - mode: NULLABLE - name: reported_by_name type: STRING - mode: NULLABLE - name: voms_pt - type: FLOAT - mode: NULLABLE + type: NUMERIC - name: organization_type type: STRING - mode: NULLABLE - name: agency_name type: STRING - mode: NULLABLE - name: ntd_id type: STRING - mode: NULLABLE + - name: division_department + type: STRING + - name: state_parent_ntd_id + type: STRING diff --git a/docs/warehouse/warehouse_starter_kit.md b/docs/warehouse/warehouse_starter_kit.md index 893b089c77..6ac226eb0f 100644 --- a/docs/warehouse/warehouse_starter_kit.md +++ b/docs/warehouse/warehouse_starter_kit.md @@ -65,7 +65,7 @@ For a given day: ### Other -- [dim_annual_ntd_agency_information](https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.dim_annual_database_agency_information) +- [dim_annual_agency_information](https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.dim_annual_database_agency_information) - View some of the data produced by the [US Department of Transportation](https://www.transit.dot.gov/ntd) for the National Transit Database. - Information from 2018-2021 are available. diff --git a/warehouse/models/docs/_docs_ntd.md b/warehouse/models/docs/_docs_ntd.md index 4ca3df7fc6..6ce0a129e6 100644 --- a/warehouse/models/docs/_docs_ntd.md +++ b/warehouse/models/docs/_docs_ntd.md @@ -2,6 +2,10 @@ Docs for NTD models; {% docs ntd_id %} A five-digit identifying number for each agency used in the current NTD system. +FTA assigns each reporter a unique five-digit NTD Identification Number. +The first digit of the NTD ID corresponds to the FTA Region where the reporter is located (e.g., 9#### indicates Region IX). +The code will have a four-to-five digit prefix for any entity submitting the report on behalf of the reporter. +For example, State Departments of Transportation (usually indicated as #R##) submit on behalf of their subrecipients. {% enddocs %} {% docs ntd_legacy_id %} @@ -40,6 +44,7 @@ The state in which the agency is headquartered. {% enddocs %} {% docs ntd_primary_uza_code %} +The primary urbanized area served by the transit agency. UACE Code remains consistent across census years. {% enddocs %} diff --git a/warehouse/models/docs/_docs_transit_database.md b/warehouse/models/docs/_docs_transit_database.md index 2fa85cefcb..34cb0e36d3 100644 --- a/warehouse/models/docs/_docs_transit_database.md +++ b/warehouse/models/docs/_docs_transit_database.md @@ -200,7 +200,7 @@ are implemented for future schema consistency, but historical data has not yet b {% docs ntd_agency_info_table %} -DEPRECATED: Please use mart_ntd.dim_annual_ntd_agency_information going forward. +DEPRECATED: Please use mart_ntd.dim_annual_agency_information going forward. 2018 NTD Agency Info Table Imported 10/6/2021 from fta.gov diff --git a/warehouse/models/mart/ntd/_mart_ntd.yml b/warehouse/models/mart/ntd/_mart_ntd.yml index 9d5040e991..9b7c360f99 100644 --- a/warehouse/models/mart/ntd/_mart_ntd.yml +++ b/warehouse/models/mart/ntd/_mart_ntd.yml @@ -102,33 +102,35 @@ x-common-fields: description: '{{ doc("ntd_xlsx_execution_ts") }}' models: - - name: dim_annual_ntd_agency_information + - name: dim_annual_agency_information description: > - Versioned extracts of the NTD Annual Database Agency Information. + Contains basic contact and agency information for each NTD reporter. - The versioning is bitemporal, so records are versioned at the year + The dataset can be found at: + https://www.transit.dot.gov/ntd/data-product/2023-annual-database-agency-information + * For other years, just replace 2023 by the desired year. - and ntd_id level. This means you must join based on - _valid_from/_valid_from + The versioning is bitemporal, so records are versioned at the year, ntd_id, and state_parent_ntd_id level. + This means you must join based on _valid_from/_valid_from to get the records for a given ntd_id and state_parent_ntd_id, + and then choose which year to look up. - to get the records for a given ntd_id, and then choose which year to - - look up. + Use _is_current to find the latest version for each set of year, ntd_id, and state_parent_ntd_id. tests: - dbt_utils.mutually_exclusive_ranges: lower_bound_column: _valid_from upper_bound_column: _valid_to - partition_by: CONCAT(year, '_', ntd_id) + partition_by: CONCAT(year, '_', ntd_id, '_', COALESCE(state_parent_ntd_id, '')) gaps: required columns: - name: key tests: - not_null - unique - - name: year + - <<: *report_year + name: year tests: - not_null - - name: ntd_id + - <<: *ntd_id tests: - not_null - name: _valid_from @@ -138,8 +140,120 @@ models: tests: - not_null - name: _is_current + description: Indicates the latest report version for each year, ntd_id, and state_parent_ntd_id. tests: - not_null + - name: state_parent_ntd_id + description: | + Indicates the ID number of the transit agency reporting to the database on behalf of the transit agency. + - name: agency_name + description: | + The agency name is the full legal name of the agency. + If reporting is required under an FTA grant program, this must reflect the legal name of the funding recipient. + - name: doing_business_as + description: The name under which the reporting agency is doing business. + - name: address_line_1 + description: First line of the agency's mailing address. + - name: address_line_2 + description: Second line of the agency's mailing address (if applicable). + - name: p_o__box + description: The PO Box of the agency (if applicable). + - name: city + description: City of the agency's mailing address. + - name: state + description: State of the agency's mailing address. + - name: zip_code + description: Zip Code of the agency's mailing address. + - name: zip_code_ext + description: Zip Code Extension of the agency's mailing address. + - name: region + description: The FTA region in which the reporter is located. + - name: density + description: The population density of the Primary UZA of the agency, if one exists. + - name: ueid + description: | + The UEID is a number or other identifier used to identify a specific commercial, nonprofit, or Government entity. + This is now reported in place of DUNS number for each unique transit agency reporting to the NTD. + See the U.S. General Services Administration UEID web page for more information. + - name: fta_recipient_id + description: | + The four-digit number assigned to a transit agency for the Federal Transit Administration (FTA) electronic grant making system — TrAMS (Transportation Award Management System). + - name: original_due_date + description: The date on which the 2020 NTD Report was due to FTA. + - name: fy_end_date + description: Calendar selection for the last day of an agency's fiscal year. + - name: number_of_counties_with_service + description: | + States report the total number of counties in the state that are currently served, in whole or in part, by Formula Grants for Rural Areas (§5311)-funded operators. + - name: number_of_state_counties + description: The number of Counties in given State (for State Departments of Transportation). + - *organization_type + - name: personal_vehicles + description: | + Vehicles that are used by the transit provider to transport passengers in revenue service but are owned by private individuals, typically an employee of the agency or a volunteer driver. + - name: population + description: The population of the Primary UZA of the agency, if one exists. + - *primary_uza_code + - *primary_uza_name + - name: reported_by_name + description: The NTD ID of the entity reporting on behalf of another entity. + - name: reported_by_ntd_id + description: | + The entity, usually a State, submitting an NTD report on behalf of another entity, usually a subrecipient of the State. + - name: reporter_acronym + description: The acronym used by the reporting agency. + - name: reporter_type + description: | + Reporter Type will be based on where they operate and the reporting requirements associated with their agency. + Agencies that receive Chapter 53 funds and own, operate, or manage capital assets in public transportation are also required to file an annual report, even if they do not receive §5307 or §5311 funds. + Agencies that do not receive or benefit from FTA funding may elect to submit their data to the NTD as Voluntary Reporter but are still assigned a reporter type. + Current types are: + `Building Reporter`, + `Full Reporter`, + `Group Plan Sponsor`, + `Planning Reporter`, + `Reduced Asset Reporter`, + `Reduced Reporter`, + `Rural Reporter`, + `Separate Service`, + `State Reporter`. + - name: reporting_module + description: | + A general classification that will determine which, if any, FTA formula programs will use the NTD data. + For example, Tribes and Native Villages will have data included in the in the §5311j Tribal Transit Program. + Reporters receiving Chapter 53 funds but not receiving or benefiting from §5307 and §5311 + AND not electing to report service data are classified as Asset due to the requirement to report asset inventory data. + These agencies are not presently included in formula program datasets. + - name: service_area_pop + description: | + A measure of access to transit service in terms of population served and area coverage (square miles). + The reporting transit agency determines the service area boundaries and population for most transit services using the definitions contained in the Americans with Disabilities Act of 1990 (ADA), + i.e. a corridor surrounding the routes 3/4 of a mile on either side, or for rail, a series of circles of radius 3/4 mile centered on each station Transit agency reporters are required to submit service area information. + - name: sq_miles + description: The square miles of the Primary UZA of the agency, if one exists. + - *service_area_sq_miles + - name: state_admin_funds_expended + description: | + States report the §5311 revenues they expended as a result of administering the program. + Since the §5311 program operates on a reimbursement basis, revenues expended during the report year will be expended during the same year. + Report the operating revenue expended during the report year from FTA §5311 Formula Grants for Rural Areas funds. + - name: subrecipient_type + description: Reflects the type of Rural Formula Grant funding received by the subrecipient. + - name: tam_tier + description: | + Defines whether the agency is a Tier I agency required to produce their own Transit Asset Management plan (and, in parenparens, on what basis) + or a Tier II operator eligible to be in a group TAM Plan. + N/A indicates that the requirement does not apply. + - name: total_voms + description: | + The Vehicles Operated in Maximum Service ("peak service level") across the entire fiscal year for the given agency. + - name: tribal_area_name + description: The tribal land, determined by US Census data, on which tribes operate. + - name: url + description: Agency's transit website. + - name: volunteer_drivers + description: | + Individuals who drive vehicles in revenue service to transport passengers for the transit provider but are not employees of the transit provider and are not compensated for their labor. - name: dim_annual_funding_sources description: >- diff --git a/warehouse/models/mart/ntd/dim_annual_ntd_agency_information.sql b/warehouse/models/mart/ntd/dim_annual_agency_information.sql similarity index 77% rename from warehouse/models/mart/ntd/dim_annual_ntd_agency_information.sql rename to warehouse/models/mart/ntd/dim_annual_agency_information.sql index 250667c5d4..772aa18532 100644 --- a/warehouse/models/mart/ntd/dim_annual_ntd_agency_information.sql +++ b/warehouse/models/mart/ntd/dim_annual_agency_information.sql @@ -2,59 +2,61 @@ WITH stg_ntd__annual_database_agency_information AS ( SELECT *, -- TODO: this does not handle deletes - LEAD(ts) OVER (PARTITION BY year, ntd_id ORDER BY ts ASC) AS next_ts, + LEAD(ts) OVER (PARTITION BY year, ntd_id, state_parent_ntd_id ORDER BY ts ASC) AS next_ts, FROM {{ ref('stg_ntd__annual_database_agency_information') }} ), -dim_annual_ntd_agency_information AS ( +dim_annual_agency_information AS ( SELECT - {{ dbt_utils.generate_surrogate_key(['year', 'ntd_id', 'ts']) }} as key, + {{ dbt_utils.generate_surrogate_key(['year', 'ntd_id', 'state_parent_ntd_id', 'ts']) }} AS key, year, ntd_id, - number_of_state_counties, - tam_tier, - personal_vehicles, - density, - uza_name, - tribal_area_name, - service_area_sq_miles, - total_voms, - city, - fta_recipient_id, - region, - state_admin_funds_expended, - zip_code_ext, - zip_code, - ueid, - address_line_2, - number_of_counties_with_service, + state_parent_ntd_id, + agency_name, reporter_acronym, - original_due_date, - sq_miles, - address_line_1, - p_o__box, - fy_end_date, + doing_business_as, + division_department, + legacy_ntd_id, reported_by_ntd_id, - population, + reported_by_name, + reporter_type, reporting_module, - service_area_pop, + organization_type, subrecipient_type, + fy_end_date, + original_due_date, + address_line_1, + address_line_2, + p_o__box, + city, state, - volunteer_drivers, - primary_uza, - doing_business_as, - reporter_type, - legacy_ntd_id, - voms_do, + zip_code, + zip_code_ext, + region, url, - reported_by_name, + fta_recipient_id, + ueid, + service_area_sq_miles, + service_area_pop, + primary_uza_code, + primary_uza_name, + tribal_area_name, + population, + density, + sq_miles, + voms_do, voms_pt, - organization_type, - agency_name, + total_voms, + volunteer_drivers, + personal_vehicles, + tam_tier, + number_of_state_counties, + number_of_counties_with_service, + state_admin_funds_expended, ts AS _valid_from, {{ make_end_of_valid_range('COALESCE(next_ts, CAST("2099-01-01" AS TIMESTAMP))') }} AS _valid_to, next_ts IS NULL AS _is_current, FROM stg_ntd__annual_database_agency_information ) -SELECT * FROM dim_annual_ntd_agency_information +SELECT * FROM dim_annual_agency_information diff --git a/warehouse/models/mart/transit_database/dim_mobility_mart_providers.sql b/warehouse/models/mart/transit_database/dim_mobility_mart_providers.sql index 5c5b7e8947..600331d1d1 100644 --- a/warehouse/models/mart/transit_database/dim_mobility_mart_providers.sql +++ b/warehouse/models/mart/transit_database/dim_mobility_mart_providers.sql @@ -77,7 +77,7 @@ funding_by_org AS ( -- We cannot use `_is_current` here because every year is marked as "current" -- since it's the "current" record for the respective year. annual_ntd AS ( - SELECT * FROM {{ ref('dim_annual_ntd_agency_information') }} + SELECT * FROM {{ ref('dim_annual_agency_information') }} WHERE state = "CA" -- We only want data from the latest data from NTD. In the rare edge case diff --git a/warehouse/models/staging/ntd/_src.yml b/warehouse/models/staging/ntd/_src.yml index 099e5fa62f..24737376d7 100644 --- a/warehouse/models/staging/ntd/_src.yml +++ b/warehouse/models/staging/ntd/_src.yml @@ -7,3 +7,9 @@ sources: schema: external_ntd_data_products tables: - name: annual_database_agency_information + description: | + Contains basic contact and agency information for each NTD reporter. + + The dataset can be found at: + https://www.transit.dot.gov/ntd/data-product/2023-annual-database-agency-information + * For other years, just replace 2023 by the desired year. diff --git a/warehouse/models/staging/ntd/_stg_ntd.yml b/warehouse/models/staging/ntd/_stg_ntd.yml index 87ff1d007e..61c9194dfa 100644 --- a/warehouse/models/staging/ntd/_stg_ntd.yml +++ b/warehouse/models/staging/ntd/_stg_ntd.yml @@ -2,12 +2,19 @@ version: 2 models: - name: stg_ntd__annual_database_agency_information + description: | + Contains basic contact and agency information for each NTD reporter. + + The dataset can be found at: + https://www.transit.dot.gov/ntd/data-product/2023-annual-database-agency-information + * For other years, just replace 2023 by the desired year. tests: - dbt_utils.unique_combination_of_columns: combination_of_columns: - ts - year - ntd_id + - state_parent_ntd_id columns: - name: ntd_id tests: diff --git a/warehouse/models/staging/ntd/stg_ntd__annual_database_agency_information.sql b/warehouse/models/staging/ntd/stg_ntd__annual_database_agency_information.sql index 61c3eed407..0112e9bee2 100644 --- a/warehouse/models/staging/ntd/stg_ntd__annual_database_agency_information.sql +++ b/warehouse/models/staging/ntd/stg_ntd__annual_database_agency_information.sql @@ -4,50 +4,52 @@ WITH source AS ( stg_ntd__annual_database_agency_information AS ( SELECT - number_of_state_counties, - tam_tier, - personal_vehicles, - density, - uza_name, - tribal_area_name, - service_area_sq_miles, - total_voms, - city, - fta_recipient_id, - region, - state_admin_funds_expended, - zip_code_ext, - zip_code, - ueid, - address_line_2, - number_of_counties_with_service, + year, + ntd_id, + state_parent_ntd_id, + agency_name, reporter_acronym, - original_due_date, - sq_miles, - address_line_1, - p_o__box, - fy_end_date, + doing_business_as, + division_department, + legacy_ntd_id, reported_by_ntd_id, - population, + reported_by_name, + reporter_type, reporting_module, - service_area_pop, + organization_type, subrecipient_type, + fy_end_date, + original_due_date, + address_line_1, + address_line_2, + p_o__box, + city, state, - volunteer_drivers, - primary_uza, - doing_business_as, - reporter_type, - legacy_ntd_id, - voms_do, + zip_code, + zip_code_ext, + region, url, - reported_by_name, + fta_recipient_id, + ueid, + service_area_sq_miles, + service_area_pop, + primary_uza AS primary_uza_code, + uza_name AS primary_uza_name, + tribal_area_name, + population, + density, + sq_miles, + voms_do, voms_pt, - organization_type, - agency_name, - ntd_id, + total_voms, + volunteer_drivers, + personal_vehicles, + tam_tier, + number_of_state_counties, + number_of_counties_with_service, + state_admin_funds_expended, dt, - ts, - year, + ts FROM source )