This repository has been archived by the owner on Sep 2, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
microsynthesis.R
97 lines (81 loc) · 4.36 KB
/
microsynthesis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#' microsynthesise
#'
#' This function generates an individual population from aggregate census data. Members of the population are
#' categorised by age, sex, ethnicity and geographical location.
#' This file synthesises a population in: MSOA, Sex, Age(single year), Ethnicity
#' The input data are loaded automatically and derive from 2011 census tables containing:
#' Aggregates of Persons per MSOA, Sex, Age(band) and Ethnicity
#' Aggregates of Persons per MSOA, Sex, Age(single year)
#' @return a data.table containing the synthetic population
#' @export
#' @examples
#' synpop = microsynthesise()
microsynthesise = function() {
# makes use of the following lazy-loaded package data: sexAgeEth, sexAgeYear
cat(paste("Population: ", sum(sexAgeEth$Persons), "\n"))
cat("Starting microsynthesis...")
# Generate aggregate totals for the four categories
msoa=aggregate(sexAgeEth$Persons, by=list(sexAgeEth$MSOA), FUN=sum)
sex=aggregate(sexAgeEth$Persons, by=list(sexAgeEth$Sex), FUN=sum)
age=aggregate(sexAgeEth$Persons, by=list(sexAgeEth$AgeBand), FUN=sum)
eth=aggregate(sexAgeEth$Persons, by=list(sexAgeEth$Ethnicity), FUN=sum)
# mapping from age bands to age ranges
ageLookup=data.table(Band=c("0-4","5-7","8-9","10-14","15","16-17","18-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79","80-84","85+"),
LBound=c(0,5,8,10,15,16,18,20,25,30,35,40,45,50,55,60,65,70,75,80,85),
UBound=c(4,7,9,14,15,17,19,24,29,34,39,44,49,54,59,64,69,74,79,84,999))
# overall population
n = sum(sexAgeYear$Persons)
# initialise population table
synpop=data.table(MSOA=rep("",n), Sex=rep("",n), Age=rep(-1,n), Ethnicity=rep("",n))
index = 1L
# microsim to get sex-age-eth expanded to single year of age, preserving marginal totals in area, sex, age band and ethnicity
# loop over MSOAs
for (a in msoa$Group.1) {
# loop over genders
for (s in sex$Group.1) {
# loop over age bands
for (b in age$Group.1) {
# marginal labels
l1 = sexAgeEth[sexAgeEth$MSOA==a & sexAgeEth$Sex==s & sexAgeEth$AgeBand==b,]$Ethnicity
l2 = sexAgeYear[sexAgeYear$MSOA==a & sexAgeYear$Sex==s & sexAgeYear$Age >= ageLookup[Band==b]$LBound & sexAgeYear$Age <= ageLookup[Band==b]$UBound,]$Age
# marginal frequencies
m1 = sexAgeEth[sexAgeEth$MSOA==a & sexAgeEth$Sex==s & sexAgeEth$AgeBand==b,]$Persons
m2 = sexAgeYear[sexAgeYear$MSOA==a & sexAgeYear$Sex==s & sexAgeYear$Age >= ageLookup[Band==b]$LBound & sexAgeYear$Age <= ageLookup[Band==b]$UBound,]$Persons
# microsynthesis (if people exist in MSOA/sex/age combination)
if (sum(m1)>0) {
res = humanleague::synthPop(list(m1,m2))
# insert into the main population
for (i in 1:nrow(res$pop)) {
set(synpop,index,"MSOA", a)
set(synpop,index,"Sex", s)
set(synpop,index,"Age", l2[res$pop$C1[i]+1])
set(synpop,index,"Ethnicity", l1[res$pop$C0[i]+1])
index = index + 1L
}
}
}
}
}
cat("done\n")
cat("Checking consistency of microsynthesised population...")
# do some spot-checks on the population to ensure it's consistent
# ensure all rows populated
stopifnot(nrow(synpop[Age==-1]) == 0)
# check sex totals match
stopifnot(nrow(synpop[Sex=="M"]) == sum(sexAgeYear[sexAgeYear$Sex=="M",]$Persons))
stopifnot(nrow(synpop[Sex=="F"]) == sum(sexAgeYear[sexAgeYear$Sex=="F",]$Persons))
# check some age totals match
stopifnot(nrow(synpop[Age==8]) == sum(sexAgeYear[sexAgeYear$Age==8,]$Persons))
stopifnot(nrow(synpop[Age==48]) == sum(sexAgeYear[sexAgeYear$Age==48,]$Persons))
# check some ethnicity totals match
stopifnot(nrow(synpop[Ethnicity=="OTH"]) == sum(sexAgeEth[sexAgeEth$Ethnicity=="OTH",]$Persons))
stopifnot(nrow(synpop[Ethnicity=="BLC"]) == sum(sexAgeEth[sexAgeEth$Ethnicity=="BLC",]$Persons))
stopifnot(nrow(synpop[Ethnicity=="BAN"]) == sum(sexAgeEth[sexAgeEth$Ethnicity=="BAN",]$Persons))
# check some MSOA totals match
stopifnot(nrow(synpop[MSOA=="E02000864"]) == sum(sexAgeEth[sexAgeEth$MSOA=="E02000864",]$Persons))
stopifnot(nrow(synpop[MSOA=="E02000888"]) == sum(sexAgeEth[sexAgeEth$MSOA=="E02000888",]$Persons))
stopifnot(nrow(synpop[MSOA=="E02006854"]) == sum(sexAgeEth[sexAgeEth$MSOA=="E02006854",]$Persons))
cat("done\n")
# save data
return(synpop)
}