Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2015 PUF compatibility #437

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ __pycache__/

# IRS-SOI PUF and related CPS matching data files
puf*.csv
demographics*.csv
*puf.csv
puf.csv*
cps-matched-puf.csv
Expand Down
4 changes: 3 additions & 1 deletion docs/book/content/data/puf_file_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ Coming soon...

## Final Prep

Coming soon...
### Imputing Pension Contributions

Target data source: [IRS SOI W-2 statistics](https://www.irs.gov/statistics/soi-tax-stats-individual-information-return-form-w2-statistics)
3 changes: 3 additions & 0 deletions taxdata/cps/cpsmar.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ def create_cps(
# person record
elif rec_type == "3":
person = parse(record, parsing_dict["person"])
# add housing subsidy to person record because it's needed in person_details
if year < 2016:
person['fhoussub'] = family['fhoussub']
person = person_details(
person,
benefits,
Expand Down
2 changes: 1 addition & 1 deletion taxdata/cps/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def create(
_units = []
for year in cps_files:
print(f"Creating Tax Units for {year}")
_yr_units = pycps(cps_dfs[year], year, verbose)
_yr_units = pycps(cps_dfs[year], year, benefits, verbose)
if validate:
validate_cps_units(cps_dfs[year], _yr_units, year)
_units.append(_yr_units)
Expand Down
12 changes: 7 additions & 5 deletions taxdata/cps/pycps.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,19 +321,21 @@ def _create_units(data, year, verbose=False, ctam_benefits=False):
return [unit.output() for unit in units.values()]


def pycps(cps: list, year: int, verbose: bool) -> pd.DataFrame:
def pycps(cps: list, year: int, ctam_benefits: bool, verbose: bool) -> pd.DataFrame:
"""
Core code for iterating through the households
Parameters
----------
cps: List where each element is a household in the CPS
year: CPS year to use
ctam_benefits: If true, attach C-TAM benefits to the CPS
verbose
"""
tax_units = []
ctam_benefits = True
if year not in C_TAM_YEARS:
ctam_benefits = False
if year not in C_TAM_YEARS and ctam_benefits:
raise ValueError(f'C-TAM Benefits not available for year {year}')
for hh in tqdm(cps):
tax_units += create_units(hh, year - 1, ctam_benefits=ctam_benefits)
tax_units += create_units(hh, year - 1, ctam_benefits=ctam_benefits, verbose=verbose)
# create a DataFrame of tax units with the new
tax_units_df = pd.DataFrame(tax_units)

Expand Down
2 changes: 2 additions & 0 deletions taxdata/puf/finalprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ def split_earnings_variables(data, data_year):
mte = 106800
elif data_year == 2011:
mte = 106800
elif data_year == 2015:
mte = 118500
else:
raise ValueError("illegal SOI PUF data year {}".format(data_year))
# total self-employment earnings subject to SECA taxation
Expand Down
23 changes: 9 additions & 14 deletions taxdata/puf/impute_pencon.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,10 @@
for details.
"""
from __future__ import print_function
import sys
import numpy as np
import pandas as pd
from pathlib import Path

if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO


CURPATH = Path(__file__).resolve().parent
DUMP0 = False
Expand Down Expand Up @@ -96,7 +90,7 @@ def targets(year):
1e6,
2e6,
5e6,
30e6,
124e6,
]


Expand Down Expand Up @@ -124,7 +118,7 @@ def wage_group(row):
for grp, underwage in enumerate(UNDER_WAGE):
if row["wage"] < underwage:
return grp
raise ValueError("illegal value of wage")
raise ValueError(f"illegal value of wage: {row['wage']}")


# end of wage_group() function
Expand Down Expand Up @@ -156,11 +150,12 @@ def wage_group(row):
# several times each with a different value of HIWAGE_PROB_SF.


# specify maximum legal elective deferral amount for DC pensions in 2011
MAX_PENCON_AMT = 16500
# specify maximum legal elective deferral amount for DC pensions in each year
# the PUF is supported
MAX_PENCON_AMT = {2011: 16500, 2015: 1800}


def impute(idata, target_cnt, target_amt):
def impute(idata, target_cnt, target_amt, year):
"""
Impute idata[pencon] given other idata variables and targets.
"""
Expand Down Expand Up @@ -202,7 +197,7 @@ def impute(idata, target_cnt, target_amt):
num_iterations = 10
for itr in range(0, num_iterations):
uncapped_amt = np.where(pos_pc, np.round(wage * rate0).astype(int), 0)
capped_amt = np.minimum(uncapped_amt, MAX_PENCON_AMT)
capped_amt = np.minimum(uncapped_amt, MAX_PENCON_AMT[year])
over_amt = uncapped_amt - capped_amt
over_tot = (over_amt * wgt).sum() * 1e-9
rate1 = min(1.0, (cell_target_amt + over_tot) / wgt_pos_pc_wages)
Expand Down Expand Up @@ -298,12 +293,12 @@ def impute_pension_contributions(alldata, year):
# do two imputations to construct gross wages for PUF records
idata["wage"] = idata["e00200"]
idata["wagegrp"] = idata.apply(wage_group, axis=1)
impute(idata, target_cnt, target_amt)
impute(idata, target_cnt, target_amt, year)
idata["wage"] = np.where(
idata["filer"] == 1, idata["e00200"] + idata["pencon"], idata["e00200"]
)
idata["wagegrp"] = idata.apply(wage_group, axis=1) # gross wage group
impute(idata, target_cnt, target_amt)
impute(idata, target_cnt, target_amt, year)
if DUMP0:
cnt = (idata["weight"] * (idata["pencon"] > 0)).sum() * 1e-6
print("wgt_pencon_cnt(#M)= {:.3f}".format(cnt))
Expand Down
1 change: 1 addition & 0 deletions taxdata/puf/preppuf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
2009: [999999],
2010: [999998, 999999],
2011: [999996, 999997, 999998, 999999],
2015: [999996, 999997, 999998, 999999],
}


Expand Down