diff --git a/README.md b/README.md index 34bc45de3..1c3ba1416 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ createdb cfdm_test Set the environment variable SQLA_SAMPLE_DB_CONN to point to this database, using: ``` -export SQLA_SAMPLE_DB_CONN="postgresql://:@localhost:/cfdm_test" +export SQLA_SAMPLE_DB_CONN="postgresql+psycopg://:@localhost:/cfdm_test" ``` Load our sample data into the development database (`cfdm_test`) by running: diff --git a/requirements.txt b/requirements.txt index 16df0a35d..7740c9066 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ kombu==5.5.3 # Starting from celery 5.x release, the minimum required version is networkx==2.6.2 prance[osv]==0.22.11.4.0 pre-commit==2.21.0 #client-side hook triggered by operations like git commit -psycopg2-binary==2.9.1 +psycopg==3.2.10 python-dateutil==2.8.1 python-dotenv>=0.20.0 # Sets variable defaults in .flaskenv file requests==2.32.4 diff --git a/tasks.py b/tasks.py index 714cc4c3d..0cc35f88d 100644 --- a/tasks.py +++ b/tasks.py @@ -262,9 +262,10 @@ def create_sample_db(ctx): """ print("Loading schema...") - db_conn = os.getenv('SQLA_SAMPLE_DB_CONN') + db_conn = os.getenv('SQLA_SAMPLE_DB_CONN1') + db_conn2 = os.getenv('SQLA_SAMPLE_DB_CONN2') if not db_conn: - print("Error: SQLA_SAMPLE_DB_CONN env var must be set") + print("Error: SQLA_SAMPLE_DB_CONN1 env var must be set") return jdbc_url = to_jdbc_url(db_conn) result = run_migrations(ctx, jdbc_url) @@ -281,7 +282,7 @@ def create_sample_db(ctx): print("Sample data loaded") print("Refreshing materialized views...") - os.environ["SQLA_CONN"] = db_conn # SQLA_CONN is used by manage.py tasks + os.environ["SQLA_CONN"] = db_conn2 # SQLA_CONN is used by manage.py tasks subprocess.check_call(['python', 'cli.py', 'refresh_materialized']) print("Materialized views refreshed") diff --git a/tests/test_filings.py b/tests/test_filings.py index ad7c73c27..d32980a5c 100644 --- a/tests/test_filings.py +++ b/tests/test_filings.py @@ -83,7 +83,7 @@ def test_filings_filters(self): factories.FilingsFactory(committee_id='C00000005'), factories.FilingsFactory(candidate_id='H00000001'), factories.FilingsFactory(amendment_indicator='A'), - factories.FilingsFactory(beginning_image_number=123456789021234567), + factories.FilingsFactory(beginning_image_number='123456789021234567'), factories.FilingsFactory(committee_type='P'), factories.FilingsFactory(cycle=2000), factories.FilingsFactory(document_type='X'), @@ -104,7 +104,7 @@ def test_filings_filters(self): filter_fields = ( ('amendment_indicator', 'A'), - ('beginning_image_number', 123456789021234567), + ('beginning_image_number', '123456789021234567'), ('committee_type', 'P'), ('cycle', 2000), ('document_type', 'X'), diff --git a/tests/test_legal/test_ao_load.py b/tests/test_legal/test_ao_load.py index 2ce97a3d6..5991159e3 100644 --- a/tests/test_legal/test_ao_load.py +++ b/tests/test_legal/test_ao_load.py @@ -458,12 +458,13 @@ def create_document(self, ao_id, document, filename='201801_C.pdf'): text(""" INSERT INTO aouser.document (document_id, ao_id, category, ocrtext, fileimage, description, document_date, filename) - VALUES (:docid, :id, :category, :text, :text, :descr, :date, :filename)"""), + VALUES (:docid, :id, :category, :text, :fileimage, :descr, :date, :filename)"""), { "docid": document["document_id"], "id": ao_id, "category": document["category"], "text": document["text"], + "fileimage": document.get("fileimage", document["text"].encode("utf-8")), "descr": document["description"], "date": document["date"], "filename": filename diff --git a/webservices/args.py b/webservices/args.py index 2b02e6de1..a965faa4a 100644 --- a/webservices/args.py +++ b/webservices/args.py @@ -83,42 +83,58 @@ def _deserialize(self, value, attr, data, **kwargs): return '{0:0>2}'.format(value) -class Date(fields.Str): +class Date(fields.Date): + """Accepts both 'YYYY-MM-DD' and 'MM/DD/YYYY' formats.""" + + def _deserialize(self, value, attr, data, **kwargs): + if isinstance(value, datetime.date): + return value + + if not isinstance(value, str): + raise exceptions.ApiError( + exceptions.DATE_ERROR, + status_code=422) - def _validate(self, value): value = value.strip() - super()._validate(value) - try: - datetime.datetime.strptime(value, '%Y-%m-%d') - except (TypeError, ValueError): + + for fmt in ("%Y-%m-%d", "%m/%d/%Y"): try: - datetime.datetime.strptime(value, '%m/%d/%Y') - except (TypeError, ValueError): + return datetime.datetime.strptime(value, fmt).date() + except ValueError: raise exceptions.ApiError( - exceptions.DATE_ERROR, - status_code=422) + exceptions.DATE_ERROR, + status_code=422) -class FullDate(fields.Str): +class FullDate(fields.DateTime): + """Accepts ISO 8601, 'YYYY-MM-DD', or 'MM/DD/YYYY' and returns datetime.""" + + def _deserialize(self, value, attr, data, **kwargs): + if isinstance(value, datetime.datetime): + return value + + if not isinstance(value, str): + raise exceptions.ApiError( + exceptions.FULLDATE_ERROR, + status_code=422) - def _validate(self, value): value = value.strip() - super()._validate(value) + + if value.endswith("Z"): + value = value[:-1] + "+00:00" + try: - if value.endswith("Z"): - value = value[:-1] + "+00:00" - # Try parsing ISO 8601 format - datetime.datetime.fromisoformat(value) - except (TypeError, ValueError): + return datetime.datetime.fromisoformat(value) + except ValueError: + pass + + for fmt in ("%Y-%m-%d", "%m/%d/%Y"): try: - # Try parsing YYYY-MM-DD - datetime.datetime.strptime(value, '%Y-%m-%d') - except (TypeError, ValueError): - try: - # Try parsing MM/DD/YYYY - datetime.datetime.strptime(value, '%m/%d/%Y') - except (TypeError, ValueError): - raise exceptions.ApiError( + return datetime.datetime.strptime(value, fmt) + except ValueError: + continue + + raise exceptions.ApiError( exceptions.FULLDATE_ERROR, status_code=422) @@ -139,7 +155,23 @@ def _validate(self, value): status_code=422) -class FileNumber(fields.Str): +class ImageNumberInt(fields.Int): + + def _validate(self, value): + super()._validate(value) + try: + value = int(value) + except (TypeError, ValueError): + raise exceptions.ApiError( + exceptions.IMAGE_NUMBER_ERROR, + status_code=422) + if value < 0: + raise exceptions.ApiError( + exceptions.IMAGE_NUMBER_ERROR, + status_code=422) + + +class FileNumber(fields.Int): def _validate(self, value): super()._validate(value) @@ -572,7 +604,7 @@ def make_seek_args(field=fields.Int, description=None): 'report_type': fields.List(IStr, metadata={'description': docs.REPORT_TYPE}), 'request_type': fields.List(IStr, metadata={'description': docs.REQUEST_TYPE}), 'document_type': fields.List(IStr, metadata={'description': docs.DOC_TYPE}), - 'beginning_image_number': fields.List(ImageNumber, metadata={'description': docs.BEGINNING_IMAGE_NUMBER}), + 'beginning_image_number': fields.List(ImageNumberInt, metadata={'description': docs.BEGINNING_IMAGE_NUMBER}), 'report_year': fields.List(fields.Int, metadata={'description': docs.REPORT_YEAR}), 'min_receipt_date': Date(metadata={'description': docs.MIN_RECEIPT_DATE}), 'max_receipt_date': Date(metadata={'description': docs.MAX_RECEIPT_DATE}), @@ -626,13 +658,13 @@ def make_seek_args(field=fields.Int, description=None): 'election_state': fields.List(IStr, metadata={'description': docs.ELECTION_STATE}), 'candidate_office': fields.List(fields.Str( validate=validate.OneOf(['', 'H', 'S', 'P'])), - metadata={'description': docs.OFFICE}), - 'candidate_district': fields.List(IStr, metadata={'description': docs.ELECTION_DISTRICT}), - 'candidate_party': fields.List(IStr, metadata={'description': docs.PARTY}), - 'image_number': fields.List(ImageNumber, metadata={'description': docs.IMAGE_NUMBER}), - 'min_load_timestamp': Date(metadata={'description': docs.LOAD_DATE}), - 'max_load_timestamp': Date(metadata={'description': docs.LOAD_DATE}), - 'committee_type': fields.List(fields.Str, metadata={'description': docs.COMMITTEE_TYPE}), + description=docs.OFFICE), + 'candidate_district': fields.List(IStr, description=docs.ELECTION_DISTRICT), + 'candidate_party': fields.List(IStr, description=docs.PARTY), + 'image_number': fields.List(ImageNumber, description=docs.IMAGE_NUMBER), + 'min_load_timestamp': Date(description=docs.LOAD_DATE), + 'max_load_timestamp': Date(description=docs.LOAD_DATE), + 'committee_type': fields.List(fields.Str, description=docs.COMMITTEE_TYPE), 'organization_type': fields.List( IStr(validate=validate.OneOf(['', 'C', 'L', 'M', 'T', 'V', 'W', 'H', 'I'])), metadata={'description': docs.ORGANIZATION_TYPE}, diff --git a/webservices/common/models/costs.py b/webservices/common/models/costs.py index 8f1b7f8f0..05fbb0dd8 100644 --- a/webservices/common/models/costs.py +++ b/webservices/common/models/costs.py @@ -7,8 +7,8 @@ class CommunicationCost(db.Model): __tablename__ = 'ofec_communication_cost_mv' - sub_id = db.Column(db.Integer, primary_key=True) - original_sub_id = db.Column('orig_sub_id', db.Integer, index=True) + sub_id = db.Column(db.BigInteger, primary_key=True) + original_sub_id = db.Column('orig_sub_id', db.BigInteger, index=True) candidate_id = db.Column('cand_id', db.String, index=True) committee_id = db.Column('cmte_id', db.String, index=True) committee_name = db.Column(db.String) @@ -61,7 +61,7 @@ class Electioneering(db.Model): candidate_state = db.Column('cand_office_st', db.String, index=True) beginning_image_number = db.Column('f9_begin_image_num', db.String, index=True) sb_image_num = db.Column(db.String, index=True) - sub_id = db.Column(db.Integer, doc=docs.EC_SUB_ID) + sub_id = db.Column(db.BigInteger, doc=docs.EC_SUB_ID) link_id = db.Column(db.Integer) sb_link_id = db.Column(db.String) number_of_candidates = db.Column(db.Numeric) diff --git a/webservices/common/models/itemized.py b/webservices/common/models/itemized.py index 0c6695ea8..2c346bf0b 100644 --- a/webservices/common/models/itemized.py +++ b/webservices/common/models/itemized.py @@ -168,8 +168,8 @@ class ScheduleA(BaseItemized): schedule_type_full = db.Column('schedule_type_desc', db.String) increased_limit = db.Column(db.String) load_date = db.Column('pg_date', db.DateTime) - sub_id = db.Column(db.Integer, primary_key=True) - original_sub_id = db.Column('orig_sub_id', db.Integer) + sub_id = db.Column(db.BigInteger, primary_key=True) + original_sub_id = db.Column('orig_sub_id', db.BigInteger) back_reference_transaction_id = db.Column('back_ref_tran_id', db.String) back_reference_schedule_name = db.Column('back_ref_sched_nm', db.String) pdf_url = db.Column(db.String) @@ -365,8 +365,8 @@ class ScheduleB(BaseItemized): schedule_type = db.Column('schedule_type', db.String) schedule_type_full = db.Column('schedule_type_desc', db.String) load_date = db.Column('pg_date', db.DateTime) - sub_id = db.Column(db.Integer, primary_key=True) - original_sub_id = db.Column('orig_sub_id', db.Integer) + sub_id = db.Column(db.BigInteger, primary_key=True) + original_sub_id = db.Column('orig_sub_id', db.BigInteger) back_reference_transaction_id = db.Column('back_ref_tran_id', db.String) back_reference_schedule_id = db.Column('back_ref_sched_id', db.String) semi_annual_bundled_refund = db.Column('semi_an_bundled_refund', db.Numeric(30, 2)) @@ -464,8 +464,8 @@ class ScheduleC(PdfMixin, BaseItemized): )''', lazy='joined', ) - sub_id = db.Column(db.Integer, primary_key=True) - original_sub_id = db.Column('orig_sub_id', db.Integer) + sub_id = db.Column(db.BigInteger, primary_key=True) + original_sub_id = db.Column('orig_sub_id', db.BigInteger) incurred_date = db.Column('incurred_dt', db.Date) loan_source_prefix = db.Column('loan_src_prefix', db.String) loan_source_first_name = db.Column('loan_src_f_nm', db.String) @@ -532,8 +532,8 @@ def form_line_number(self): class ScheduleD(PdfMixin, BaseItemized): __tablename__ = 'ofec_sched_d_mv' - sub_id = db.Column(db.Integer, primary_key=True) - original_sub_id = db.Column('orig_sub_id', db.Integer) + sub_id = db.Column(db.BigInteger, primary_key=True) + original_sub_id = db.Column('orig_sub_id', db.BigInteger) committee_name = db.Column('cmte_nm', db.String, doc=docs.COMMITTEE_NAME) creditor_debtor_name = db.Column('cred_dbtr_nm', db.String) creditor_debtor_last_name = db.Column('cred_dbtr_l_nm', db.String) @@ -588,7 +588,7 @@ def form_line_number(self): class ScheduleE(PdfMixin, BaseItemized): __tablename__ = 'ofec_sched_e_mv' - sub_id = db.Column(db.String, primary_key=True) + sub_id = db.Column(db.BigInteger, primary_key=True) # Payee info payee_prefix = db.Column(db.String) payee_name = db.Column('pye_nm', db.String) @@ -657,7 +657,7 @@ class ScheduleE(PdfMixin, BaseItemized): filer_last_name = db.Column('filer_l_nm', db.String) filer_suffix = db.Column(db.String) transaction_id = db.Column('tran_id', db.String) - original_sub_id = db.Column('orig_sub_id', db.Integer) + original_sub_id = db.Column('orig_sub_id', db.BigInteger) action_code = db.Column('action_cd', db.String) action_code_full = db.Column('action_cd_desc', db.String) # Auxiliary fields @@ -772,8 +772,8 @@ class ScheduleF(PdfMixin, BaseItemized): lazy='joined', ) - sub_id = db.Column(db.Integer, primary_key=True) - original_sub_id = db.Column('orig_sub_id', db.Integer) + sub_id = db.Column(db.BigInteger, primary_key=True) + original_sub_id = db.Column('orig_sub_id', db.BigInteger) committee_designated_coordinated_expenditure_indicator = db.Column('cmte_desg_coord_exp_ind', db.String) committee_name = db.Column('cmte_nm', db.String) entity_type = db.Column('entity_tp', db.String) @@ -888,8 +888,8 @@ class ScheduleH4(BaseItemized): disbursement_amount = db.Column('disbursement_amount', db.Numeric(30, 2), index=True) schedule_type = db.Column('schedule_type', db.String) schedule_type_full = db.Column('schedule_type_desc', db.String) - sub_id = db.Column(db.Integer, primary_key=True) - original_sub_id = db.Column('orig_sub_id', db.Integer) + sub_id = db.Column(db.BigInteger, primary_key=True) + original_sub_id = db.Column('orig_sub_id', db.BigInteger) federal_share = db.Column('fed_share', db.Numeric(14, 2)) nonfederal_share = db.Column('nonfed_share', db.Numeric(14, 2)) administrative_voter_drive_activity_indicator = db.Column('admin_voter_drive_acty_ind', db.String) diff --git a/webservices/common/models/links.py b/webservices/common/models/links.py index ef8e88d85..ab8e3ce11 100644 --- a/webservices/common/models/links.py +++ b/webservices/common/models/links.py @@ -30,7 +30,7 @@ class CandidateCommitteeAlternateLink(db.Model): __table_args__ = {"schema": "disclosure"} __tablename__ = "cand_cmte_linkage_alternate" - sub_id = db.Column(db.Integer, primary_key=True, doc=docs.SUB_ID) + sub_id = db.Column(db.BigInteger, primary_key=True, doc=docs.SUB_ID) candidate_id = db.Column( "cand_id", db.String, diff --git a/webservices/common/models/national_party.py b/webservices/common/models/national_party.py index 3c0034455..709de8e4c 100644 --- a/webservices/common/models/national_party.py +++ b/webservices/common/models/national_party.py @@ -82,7 +82,7 @@ class NationalParty_ScheduleA(db.Model): memo_cd_desc = db.Column(db.String) memo_text = db.Column(db.String) national_cmte_nonfed_acct = db.Column(db.String) - orig_sub_id = db.Column(db.Integer) + orig_sub_id = db.Column(db.BigInteger) party = db.Column(db.String) party_account_type = db.Column('party_account', db.String) party_full = db.Column(db.String) @@ -96,7 +96,7 @@ class NationalParty_ScheduleA(db.Model): schedule_type_desc = db.Column(db.String) state = db.Column(db.String) state_full = db.Column(db.String) - sub_id = db.Column(db.Integer, primary_key=True, index=True) + sub_id = db.Column(db.BigInteger, primary_key=True, index=True) tran_id = db.Column(db.String) treasurer_name = db.Column(db.String) two_year_transaction_period = db.Column(db.Integer) @@ -152,7 +152,7 @@ class NationalParty_ScheduleB(db.Model): memo_cd_desc = db.Column(db.String) memo_text = db.Column(db.String) national_cmte_nonfed_acct = db.Column('national_cmte_nonfed_acct', db.String) - orig_sub_id = db.Column(db.Integer) + orig_sub_id = db.Column(db.BigInteger) party = db.Column(db.String) party_account = db.Column('party_account', db.String) party_full = db.Column('party_full', db.String) @@ -195,7 +195,7 @@ class NationalParty_ScheduleB(db.Model): spender_committee_type_full = db.Column('committee_type_full', db.String) state = db.Column(db.String) state_full = db.Column(db.String) - sub_id = db.Column(db.Integer, primary_key=True, index=True) + sub_id = db.Column(db.BigInteger, primary_key=True, index=True) tran_id = db.Column(db.String) treasurer_name = db.Column(db.String) two_year_transaction_period = db.Column(db.Integer) diff --git a/webservices/common/models/operations_log.py b/webservices/common/models/operations_log.py index 324acc006..5534ebff2 100644 --- a/webservices/common/models/operations_log.py +++ b/webservices/common/models/operations_log.py @@ -5,8 +5,8 @@ class OperationsLog(db.Model): __tablename__ = 'fec_operations_log_vw' - sub_id = db.Column(db.Integer, primary_key=True, doc=docs.SUB_ID) - status_num = db.Column(db.Integer, doc=docs.STATUS_NUM) + sub_id = db.Column(db.BigInteger, primary_key=True, doc=docs.SUB_ID) + status_num = db.Column(db.String, doc=docs.STATUS_NUM) form_type = db.Column('form_tp', db.String, doc=docs.FORM_TYPE) report_year = db.Column('rpt_yr', db.Integer, doc=docs.REPORT_YEAR) candidate_committee_id = db.Column('cand_cmte_id', db.String, doc=docs.CAND_CMTE_ID) diff --git a/webservices/common/models/reports.py b/webservices/common/models/reports.py index 7ab11f133..c06e29d1c 100644 --- a/webservices/common/models/reports.py +++ b/webservices/common/models/reports.py @@ -112,7 +112,7 @@ class CommitteeReports(FecFileNumberMixin, PdfMixin, CsvMixin, BaseModel): file_number = mapped_column(db.Integer, sort_order=-490) amendment_indicator = mapped_column('amendment_indicator', db.String, sort_order=-480) amendment_indicator_full = mapped_column(db.String, sort_order=-470) - beginning_image_number = mapped_column(db.BigInteger, doc=docs.BEGINNING_IMAGE_NUMBER, sort_order=-460) + beginning_image_number = mapped_column(db.String, doc=docs.BEGINNING_IMAGE_NUMBER, sort_order=-460) cash_on_hand_beginning_period = mapped_column( db.Numeric(30, 2), doc=docs.CASH_ON_HAND_BEGIN_PERIOD, sort_order=-450 @@ -132,9 +132,9 @@ class CommitteeReports(FecFileNumberMixin, PdfMixin, CsvMixin, BaseModel): debts_owed_to_committee = mapped_column( db.Numeric(30, 2), doc=docs.DEBTS_OWED_TO_COMMITTEE, sort_order=-400 ) # P - end_image_number = mapped_column(db.BigInteger, doc=docs.ENDING_IMAGE_NUMBER) - other_disbursements_period = mapped_column( - db.Numeric(30, 2), doc=docs.add_period(docs.OTHER_DISBURSEMENTS), sort_order=-390 + end_image_number = db.Column(db.BigInteger, doc=docs.ENDING_IMAGE_NUMBER) + other_disbursements_period = db.Column( + db.Numeric(30, 2), doc=docs.add_period(docs.OTHER_DISBURSEMENTS) ) # PX other_disbursements_ytd = mapped_column( db.Numeric(30, 2), doc=docs.add_ytd(docs.OTHER_DISBURSEMENTS), @@ -483,7 +483,7 @@ class CommitteeReportsPresidential(CommitteeReports): class CommitteeReportsIEOnly(PdfMixin, BaseModel): __tablename__ = 'ofec_reports_ie_only_mv' - beginning_image_number = db.Column(db.BigInteger) + beginning_image_number = db.Column(db.String) committee_id = db.Column(db.String) committee_name = db.Column(db.String) cycle = db.Column(db.Integer) @@ -521,7 +521,7 @@ class BaseFiling(FecFileNumberMixin, AmendmentChainMixin, PdfMixin, FecMixin, db coverage_end_date = db.Column('through_date', db.Date) rpt_pgi = db.Column('rptpgi', db.String, doc=docs.ELECTION_TYPE) report_type = db.Column('rptcode', db.String) - beginning_image_number = db.Column('imageno', db.BigInteger) + beginning_image_number = db.Column('imageno', db.String) street_1 = db.Column('str1', db.String) street_2 = db.Column('str2', db.String) city = db.Column(db.String) diff --git a/webservices/resources/candidate_aggregates.py b/webservices/resources/candidate_aggregates.py index c96f3666b..acf6309bc 100644 --- a/webservices/resources/candidate_aggregates.py +++ b/webservices/resources/candidate_aggregates.py @@ -480,24 +480,19 @@ def build_query(self, **kwargs): query = query.add_columns( total.office.label("office") ) - query = query.add_columns( - sa.case( + party_case = sa.case( (total.party == "DFL", "DEM"), (total.party == "DEM", "DEM"), (total.party == "REP", "REP"), else_="Other", - ).label("party") - ) - + ).label("party") + query = query.add_columns( + party_case + ) query = query.group_by( total.election_year, total.office, - sa.case( - (total.party == "DFL", "DEM"), - (total.party == "DEM", "DEM"), - (total.party == "REP", "REP"), - else_="Other", - ), + party_case, ) # without `aggregate_by`, aggregate by election_year only