-
Notifications
You must be signed in to change notification settings - Fork 0
Data Preparation 2016
In the source code below, the data supplied by the Hawaii Department of Education are two pipe delimited file named Hawaii_Data_LONG_2016_Prep.txt and Hawaii_Data_LONG_2016_Courtesy_Tested Prep.txt. The files are located in the relative folder Data/Base_Files. Following the text data being read into R and combined into a single data table, the data is cleaned up variable by variable to ensure that it matches previously established data naming conventions.
###########################################################################
###
### R Syntax for construction of 2016 Hawaii LONG data file
###
###########################################################################
### Load SGP Package
require(SGP)
require(data.table)
### Load tab delimited data
Hawaii_Data_LONG_2016_1 <- fread("Data/Base_Files/Hawaii_Data_LONG_2016_Prep.txt",
colClasses=rep("character", 34))
Hawaii_Data_LONG_2016_2 <- fread("Data/Base_Files/Hawaii_Data_LONG_2016_Courtesy_Tested Prep.txt",
colClasses=rep("character", 34))
Hawaii_Data_LONG_2016 <- rbindlist(list(Hawaii_Data_LONG_2016_1, Hawaii_Data_LONG_2016_2))
### Tidy up data
setnames(Hawaii_Data_LONG_2016,
c("Valid_Case", "year", "grade", "lastName", "firstName", "EMH Level", "ELL Status", "Complex Area"),
c("VALID_CASE", "Year", "Gr", "LName", "FName", "EMH.Level", "ELL_STATUS_MULTILEVEL", "Complex.Area"))
Hawaii_Data_LONG_2016[,VALID_CASE := "VALID_CASE"]
Hawaii_Data_LONG_2016[,Gr := as.character(as.numeric(Gr))]
Hawaii_Data_LONG_2016[Gr %in% c("1", "2", "9"), VALID_CASE := "INVALID_CASE"]
Hawaii_Data_LONG_2016[,DOE_Ethnic := as.character(DOE_Ethnic)]
Hawaii_Data_LONG_2016[,Fed7_Ethnic := as.factor(Fed7_Ethnic)]
Hawaii_Data_LONG_2016[,Fed5_Ethnic := as.factor(Fed5_Ethnic)]
Hawaii_Data_LONG_2016[,Disadv := as.factor(Disadv)]
Hawaii_Data_LONG_2016[,ELL := as.factor(ELL)]
Hawaii_Data_LONG_2016[,SpEd := as.factor(SpEd)]
Hawaii_Data_LONG_2016[,Migrant := as.factor(Migrant)]
Hawaii_Data_LONG_2016[,Scale_Score := as.numeric(Scale_Score)]
Hawaii_Data_LONG_2016[,FSY := as.factor(FSY)]
Hawaii_Data_LONG_2016[,ETHNICITY := as.character(Fed7_Ethnic)]
Hawaii_Data_LONG_2016[DOE_Ethnic %in% c("Native Hawaiian", "Part-Hawaiian"), ETHNICITY := "Native Hawaiian"]
Hawaii_Data_LONG_2016[,ETHNICITY := as.factor(Hawaii_Data_LONG_2016$ETHNICITY)]
levels(Hawaii_Data_LONG_2016$ETHNICITY)[c(3,4)] <- c("Black or African American", "Hispanic or Latino")
Hawaii_Data_LONG_2016[District=="Charter", District := "Charter Schools"]
Hawaii_Data_LONG_2016[,Complex := as.factor(Hawaii_Data_LONG_2016$Complex)]
levels(Hawaii_Data_LONG_2016$Complex) <- as.vector(sapply(levels(Hawaii_Data_LONG_2016$Complex), capwords))
levels(Hawaii_Data_LONG_2016$Complex)[c(21,24,38)] <- paste(levels(Hawaii_Data_LONG_2016$Complex)[c(21,24,38)], "Complex")
levels(Hawaii_Data_LONG_2016$Complex) <- as.vector(sapply(sapply(strsplit(sapply(levels(Hawaii_Data_LONG_2016$Complex), capwords), " "), head, -1), paste, collapse=" "))
levels(Hawaii_Data_LONG_2016$Complex)[29] <- "McKinley"
Hawaii_Data_LONG_2016[,Complex.Area := as.factor(Complex.Area)]
Hawaii_Data_LONG_2016[,Sex := as.factor(Sex)]
Hawaii_Data_LONG_2016[,ELL_STATUS_MULTILEVEL := as.factor(ELL_STATUS_MULTILEVEL)]
Hawaii_Data_LONG_2016[,School_Admin_Rollup := as.factor(School_Admin_Rollup)]
Hawaii_Data_LONG_2016[,District := as.factor(District)]
Hawaii_Data_LONG_2016[,STATE_ENROLLMENT_STATUS := as.factor(STATE_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2016[,SCHOOL_ENROLLMENT_STATUS := as.factor(SCHOOL_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2016[,DISTRICT_ENROLLMENT_STATUS := as.factor(DISTRICT_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2016[FSY=="Full School Year Status: No",DISTRICT_ENROLLMENT_STATUS := "Enrolled District: No"]
Hawaii_Data_LONG_2016[,COMPLEX_ENROLLMENT_STATUS := as.factor(COMPLEX_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2016[FSY=="Full School Year Status: No",COMPLEX_ENROLLMENT_STATUS := "Enrolled Complex: No"]
Hawaii_Data_LONG_2016[,COMPLEX_AREA_ENROLLMENT_STATUS := as.factor(COMPLEX_AREA_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2016[FSY=="Full School Year Status: No",COMPLEX_AREA_ENROLLMENT_STATUS := "Enrolled Complex Area: No"]
Hawaii_Data_LONG_2016[,FSY_SchCode := as.integer(FSY_SchCode)]
Hawaii_Data_LONG_2016[,HIGH_NEED_STATUS_DEMOGRAPHIC :=
factor(2, levels=1:2, labels=c("High Need Status: ELL, Special Education, or Disadvantaged Student",
"High Need Status: Non-ELL, Non-Special Education, and Non-Disadvantaged Student"))]
Hawaii_Data_LONG_2016$HIGH_NEED_STATUS_DEMOGRAPHIC[
Hawaii_Data_LONG_2016$Disadv=="Disadvantaged: Yes" |
Hawaii_Data_LONG_2016$ELL=="ELL Status: Yes" |
Hawaii_Data_LONG_2016$SpEd=="Special Education: Yes"] <-
"High Need Status: ELL, Special Education, or Disadvantaged Student"
Hawaii_Data_LONG_2016[,SCHOOL_FSY_ENROLLMENT_STATUS := factor(2, levels=1:2, labels=c("Enrolled School: No", "Enrolled School: Yes"))]
Hawaii_Data_LONG_2016$SCHOOL_FSY_ENROLLMENT_STATUS[Hawaii_Data_LONG_2016$SCHOOL_ENROLLMENT_STATUS=="Enrolled School: No" | Hawaii_Data_LONG_2016$FSY=="Full School Year Status: No"] <- "Enrolled School: No"
### Reorder variables
my.variable.order <- c("VALID_CASE", "Domain", "Year", "Gr", "IDNO", "LName", "FName",
"SCode_Admin_Rollup", "School_Admin_Rollup", "FSY_SchCode", "EMH.Level",
"DCode", "District", "CCode", "Complex", "CACode", "Complex.Area",
"Sex", "ETHNICITY", "HIGH_NEED_STATUS_DEMOGRAPHIC", "DOE_Ethnic",
"Fed7_Ethnic", "Fed5_Ethnic", "Disadv", "ELL", "ELL_STATUS_MULTILEVEL",
"SpEd", "Migrant", "Scale_Score", "Proficiency_Level", "FSY",
"SCHOOL_ENROLLMENT_STATUS", "DISTRICT_ENROLLMENT_STATUS",
"COMPLEX_ENROLLMENT_STATUS", "COMPLEX_AREA_ENROLLMENT_STATUS",
"STATE_ENROLLMENT_STATUS", "SCHOOL_FSY_ENROLLMENT_STATUS")
setcolorder(Hawaii_Data_LONG_2016, my.variable.order)
### Save results
save(Hawaii_Data_LONG_2016, file="Data/Hawaii_Data_LONG_2016.Rdata")
Note that the naming conventions associated with Hawaii data provided by the DOE and the naming conventions used by the SGP Package are different. The SGP Package accommodates different state data naming conventions through the use of a meta-data lookup table embedded within the SGPstateData Rdata object in the package. The variable name lookup table for Hawaii is shown below with names.provided giving the state specific names, names.sgp the SGP Package specific names, names.type giving the type associated with the variable which is used within summarizeSGP
to construct group summaries, names.info providing meta-data associated with the variable, and names.output a Boolean indicator of whether variable should be used with summarizeSGP
.
names.provided | names.sgp | names.type | names.info | names.output |
---|---|---|---|---|
Year | YEAR | time | Year (testing year) associated with record | |
IDNO | ID | individual | Unique student identifier | |
LName | LAST_NAME | label | Last name of student | |
FName | FIRST_NAME | label | First name of student | |
Gr | GRADE | institution_level | Grade level of test taken | |
Domain | CONTENT_AREA | content | Content area (Mathematics or Reading) | |
Scale_Score | SCALE_SCORE | measure | Student scale score | |
Proficiency_Level | ACHIEVEMENT_LEVEL | measure | Achievement level associated with student score | |
SCode_Admin_Rollup | SCHOOL_NUMBER | institution | School number rolled up for administration purposes possibly containing multiple SCHOOL_NUMBERs | |
School_Admin_Rollup | SCHOOL_NAME | label | School name rolled up for administration purposes possibly containing multiple SCHOOL_NUMBERs | |
FSY_SchCode | SCHOOL_NUMBER_FSY | institution | School number rolled up for FSY reporting | |
GradeRange | GRADE_RANGE | institution_type | Grade range of school | |
SchlType | SCHOOL_TYPE | institution_type | Type of School (Charter/DOE/Special) | |
EMH.Level | EMH_LEVEL | institution_type | Elementary/Middle/High School designation | |
Type | SCHOOL_LEVEL | institution_type | Elementary/Elementary-Middle/Middle/Middle-High/High School designation | |
DCode | DISTRICT_NUMBER | institution | District number | |
District | DISTRICT_NAME | label | District name | |
CCode | COMPLEX_NUMBER | institution | Complex number | |
Complex | COMPLEX_NAME | label | Complex name | |
CACode | COMPLEX_AREA_NUMBER | institution | Complex area number | |
Complex.Area | COMPLEX_AREA_NAME | label | Complex area name | |
ETHNICITY | ETHNICITY | demographic | Student ethnicity combining FED7 and DOE for Native Hawaiian | TRUE |
DOE_Ethnic | ETHNICITY_DOE | demographic | Student ethnicity using DOE categories | TRUE |
Fed5_Ethnic | ETHNICITY_FED5 | demographic | Student ethnicity using FED 5 categories | FALSE |
Fed7_Ethnic | ETHNICITY_FED7 | demographic | Student ethnicity using FED 7 categories | FALSE |
Disadv | DISADVANTAGED_STATUS | demographic | Disadvantaged status | TRUE |
ELL | ELL_STATUS | demographic | Student ELL status indicator | TRUE |
ELL_STATUS_MULTILEVEL | ELL_STATUS_MULTILEVEL | demographic | Student ELL multi-level status indicator | TRUE |
SpEd | SPECIAL_EDUCATION_STATUS | demographic | Student special education status indicator | TRUE |
Sex | GENDER | demographic | Student gender | TRUE |
Source | TEST_ADMINISTRATION | type of test | HSA/HSAA/HLIP/HAPA/Linapuni | |
Migrant | MIGRANT_STATUS | demographic | Student migrant status | TRUE |
HIGH_NEED_STATUS | HIGH_NEED_STATUS | demographic | High need status flag | TRUE |
HIGH_NEED_STATUS_DEMOGRAPHIC | HIGH_NEED_STATUS_DEMOGRAPHIC | demographic | High need status flag indicating ELL or SPED or DISADVANTAGED | TRUE |
FSY | FULL_SCHOOL_YEAR_STATUS | demographic | Full school year status indicator | TRUE |
SCHOOL_ENROLLMENT_STATUS | SCHOOL_ENROLLMENT_STATUS | institution_inclusion | School inclusion/accountability indicator | |
SCHOOL_FSY_ENROLLMENT_STATUS | SCHOOL_FSY_ENROLLMENT_STATUS | institution_inclusion | School inclusion/accountability indicator | |
DISTRICT_ENROLLMENT_STATUS | DISTRICT_ENROLLMENT_STATUS | institution_inclusion | District inclusion/accountability indicator | |
COMPLEX_ENROLLMENT_STATUS | COMPLEX_ENROLLMENT_STATUS | institution_inclusion | Complex inclusion/accountability indicator | |
COMPLEX_AREA_ENROLLMENT_STATUS | COMPLEX_AREA_ENROLLMENT_STATUS | institution_inclusion | Complex Area inclusion/accountability indicator | |
STATE_ENROLLMENT_STATUS | STATE_ENROLLMENT_STATUS | institution_inclusion | State inclusion/accountability indicator | |
VALID_CASE | VALID_CASE | individual inclusion | Valid case indicator |
In addition to preparation the student level file for analysis, Hawaii also utilizes the SGP package SGP Package for the summarization of instructor level data. Utilization of this requires a student-instructor lookup file so that appropriate summary level variables can be created. In the source code below, student-instructor data provided by the Hawaii Department of Education is read into R and cleaned for use in SGP analyses.
################################################################################
###
### R Syntax for for the construction of the 2016 student-instructor lookup table
###
################################################################################
### Load SGP Package
require(SGP)
require(data.table)
### Load data
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER <- fread("Data/Base_Files/BFK_Cleaned_Spring_2016.txt", colClasses=rep("character", 58))
setnames(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER, c("_ of Days"), c("SumOfRosterableDays"))
### Extract relevant variables
variables.to.use <- c("StaffUniqueID", "StaffLastName", "StaffFirstName", "SchoolName",
"SchoolCode", "SubjectName", "StateStudentID", "SumOfRosterableDays")
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,variables.to.use, with=FALSE]
### Remove duplicates
setkeyv(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER, names(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER))
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[!duplicated(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER, by=key(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER))]
### Tidy up data
setnames(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER,
names(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER),
c("INSTRUCTOR_NUMBER", "INSTRUCTOR_LAST_NAME", "INSTRUCTOR_FIRST_NAME",
"SCHOOL_NAME_INSTRUCTOR", "SCHOOL_NUMBER_INSTRUCTOR", "CONTENT_AREA", "ID", "SUM_OF_DAYS"))
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER$YEAR <- "2016"
setcolorder(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER,
c("ID","CONTENT_AREA", "YEAR", "INSTRUCTOR_NUMBER", "INSTRUCTOR_LAST_NAME",
"INSTRUCTOR_FIRST_NAME", "SCHOOL_NUMBER_INSTRUCTOR", "SCHOOL_NAME_INSTRUCTOR", "SUM_OF_DAYS" ))
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[CONTENT_AREA=="Math", CONTENT_AREA := "MATHEMATICS"]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[CONTENT_AREA=="ELA", CONTENT_AREA := "READING"]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[, INSTRUCTOR_ENROLLMENT_STATUS := factor(1, levels=0:1,
labels=c("Enrolled Instructor: No", "Enrolled Instructor: Yes"))]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,SUM_OF_DAYS := as.numeric(SUM_OF_DAYS)]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,INSTRUCTOR_LAST_NAME := as.factor(INSTRUCTOR_LAST_NAME)]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,INSTRUCTOR_FIRST_NAME := as.factor(INSTRUCTOR_FIRST_NAME)]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,SCHOOL_NUMBER_INSTRUCTOR := as.integer(SCHOOL_NUMBER_INSTRUCTOR)]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,SCHOOL_NAME_INSTRUCTOR := as.factor(SCHOOL_NAME_INSTRUCTOR)]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,VALID_CASE := "VALID_CASE"]
### Create TERMS variable from sum of TERMS
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,TERMS := round(SUM_OF_DAYS/40)]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,TERMS := sum(TERMS, na.rm=TRUE), keyby=list(ID, CONTENT_AREA, INSTRUCTOR_NUMBER)]
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[!duplicated(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER, by=key(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER))]
### Create Weight Variable
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,INSTRUCTOR_WEIGHT := round(TERMS/sum(TERMS, na.rm=TRUE), digits=2), by=list(ID, CONTENT_AREA)]
### NULL out extraneous variables
Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER[,c("SUM_OF_DAYS", "TERMS") := NULL]
### Set column order
tmp.column.order <- c('ID', 'CONTENT_AREA', 'YEAR', 'INSTRUCTOR_NUMBER', 'INSTRUCTOR_LAST_NAME',
'INSTRUCTOR_FIRST_NAME', 'SCHOOL_NUMBER_INSTRUCTOR', 'SCHOOL_NAME_INSTRUCTOR',
'INSTRUCTOR_WEIGHT', 'INSTRUCTOR_ENROLLMENT_STATUS', 'VALID_CASE')
setcolorder(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER, tmp.column.order)
setkeyv(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER, c("ID", "CONTENT_AREA", "YEAR"))
### Save results
save(Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER, file="Data/Hawaii_Data_LONG_2016_INSTRUCTOR_NUMBER.Rdata")
SGP - Student Growth Percentiles SGP Blog | SGP GitHub Repo | SGP on CRAN