-
Notifications
You must be signed in to change notification settings - Fork 1
/
BinMatching-Function.R
249 lines (203 loc) · 16.3 KB
/
BinMatching-Function.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# This function reads in a data frame for all groups where there are bird observations for both observers (f/r)
# It compares the records and classifies each bird observation as follows:
#
# 1. checks for perfect match of species-count-type, classifies as perfectMatch
# 1b. checks scoter and scaup for band match .... if bands match, perfectMatch_B (classified as perfect if band doesn't match)
# 2. checks species records for each observer for perfect match to a generic for other observer (perfectMatchGen)
# 3. After removing perfect matches, checks remaining species in groups and codes as follows:
# 4. i. noMatchgrp = only one bird for one observer left after perfect matches, this is similar to a "noMatch" record, but in a group
# ii. countMatchSp = if front and rear share species, check if they match when summed over all species in group
# iii. countMatchGeneric = if front/rear has species and rear/front has corresponding generic, check if sum totBirds match
# Note: if there are TWO+ species coded + generic, the matching is ambiguous and not considered
# Matching function is run after ambiguous groups are flagged, ambiguous groups have all birds except perfect matches coded misMatch
# There are 0 ambiguous groups
# iv. speciesMatch = if species match but not total bird count for species
# v. genericMatch = if front/rear has species and rear/front has generic, counts do not match
# all remaining records are "misMatch" ... including same generic coded to different species (e.g., f = BLSC and r = SUSC)
BinMatching.fn <- function(inFrame) {
outFrame <- NULL
front <- inFrame %>% filter(pos == "f") %>% select(species, count, bin, genericSp, index) %>%
mutate(check1 = paste(species,count), check2 = paste(genericSp,count), check3 = paste(species,bin), check4 = paste(genericSp, bin))
rear <- inFrame %>% filter(pos == "r") %>% select(species, count, bin, genericSp, index) %>%
mutate(check1 = paste(species,count), check2 = paste(genericSp,count), check3 = paste(species, bin), check4 = paste(genericSp, bin)) # At start, both front and rear have at least one observation in group, or function is not called
# 1. Code any perfect matches (species, count both match) ####
for (i in 1:nrow(front)) {
if (nrow(rear) > 0) { # rear is decremented by each match, so need to ensure there are still records to check
testMatch <- rear$index[front$check1[i] == rear$check1][1] # select first match
if(!is.na(testMatch)) { # if there is a match, do
matchType <- "perfectMatch"
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% c(front$index[i],testMatch)) %>% mutate(reconcile = matchType)
)
rear <- rear %>% filter(index != testMatch) # drop matched record
} # end reconcile code update if match exists
} # end action loop if rear still has records to check
} # end i loop
# 2. Check for perfect generic match (swap generic in for Front species, swap generic in for Rear species) ####
# Note: can't do front/rear simultaneously or could match BLSC to SUSC, e.g., only want BLSC to SCOT, etc.
# This happens after any generics are matched
# 2a. Check front species to rear generic: ####
front <- front %>% filter(!(index %in% outFrame$index)) # matched records were dropped in rear, need to be dropped from front
if (nrow(front) > 0) { # first need to check there are still front records to match ....
# This is the same code as above, using check2 for front, then repeated with check2 for rear:
for (i in 1:nrow(front)) {
if (nrow(rear) > 0) { # rear is decremented by each match, so need to ensure there are still records to check
testMatch <- rear$index[front$check2[i] == rear$check1][1] # select first match
if(!is.na(testMatch)) { # if there is a match, do
matchType <- "perfectMatchGen"
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% c(front$index[i],testMatch)) %>% mutate(reconcile = matchType)
)
rear <- rear %>% filter(index != testMatch) # drop matched record
} # end reconcile code update if match exists
} # end action loop if rear still has records to check
} # end i loop
} # end action loop if front still has records to check
# 2b. Check rear species to front generic: ####
front <- front %>% filter(!(index %in% outFrame$index)) # matched records were dropped in rear, need to be dropped from front
if (nrow(rear) > 0) { # first need to check there are still rear records to match ....
# This is the same code as above, using check2 for rear, then repeated with check2 for front:
for (i in 1:nrow(rear)) {
if (nrow(front) > 0) { # front is decremented by each match, so need to ensure there are still records to check
testMatch <- front$index[rear$check2[i] == front$check1][1] # select first match
if(!is.na(testMatch)) { # if there is a match, do
matchType <- "perfectMatchGen"
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% c(rear$index[i],testMatch)) %>% mutate(reconcile = matchType)
)
front <- front %>% filter(index != testMatch) # drop matched record
} # end reconcile code update if match exists
} # end action loop if front still has records to check
} # end i loop
} # end action loop if rear still has records to check
rear <- rear %>% filter(!(index %in% outFrame$index)) # matched records were dropped in front, need to be dropped from rear
# 3. consider case of count matches ... ####
# This occurs if observers have "broken up" observations into different social pairings
if (nrow(front) == 0 | nrow(rear) == 0) { # are there observations left to compare?
if (nrow(front) > 0) { # one or both could be empty after perfect matches accounted for ....
testMatch <- front$index
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% testMatch) %>% mutate(reconcile = "noMatchgrp"))
}
if (nrow(rear) > 0) {
testMatch <- rear$index
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% testMatch) %>% mutate(reconcile = "noMatchgrp"))
}
} else {
# determine if there are species matches in other observer's records:
sp.match <- intersect(front$species, rear$species)
if(length(sp.match) > 0) { # check counts for matched species
compareCounts <- full_join(front %>% filter(species %in% sp.match) %>%
select(species, tBf = count) %>% group_by(species) %>%
summarize(tBf = sum(tBf)),
rear %>% filter(species %in% sp.match) %>%
select(species, tBr = count) %>% group_by(species) %>%
summarize(tBr = sum(tBr))) %>%
ungroup() %>%
mutate(countMatch = (tBf == tBr))
sp.matchCount <- compareCounts$species[compareCounts$countMatch] # species with count matches
testMatch <- c(front$index[front$species %in% sp.matchCount],
rear$index[rear$species %in% sp.matchCount])
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% testMatch) %>% mutate(reconcile = "countMatchSp"))
# Remove matched species-count
front <- front %>% filter(!(species %in% sp.matchCount)) # drop species with count matches
rear <- rear %>% filter(!(species %in% sp.matchCount))
}
# check for generic matches ....
generic.match <- intersect(front$genericSp, rear$genericSp)
if(length(generic.match) > 0 &
(unique(inFrame$ambigGrp) == "no")) { # check counts for generics, not including ambiguous groups
compareCounts <- full_join(front %>% filter(genericSp %in% generic.match) %>%
select(genericSp, tBf = count) %>% group_by(genericSp) %>%
summarize(tBf = sum(tBf)),
rear %>% filter(genericSp %in% generic.match) %>%
select(genericSp, tBr = count) %>% group_by(genericSp) %>%
summarize(tBr = sum(tBr))) %>%
ungroup() %>%
mutate(countMatch = (tBf == tBr))
generic.matchCount <- compareCounts$genericSp[compareCounts$countMatch] # generics with count matches
testMatch <- c(front$index[front$genericSp %in% generic.matchCount],
rear$index[rear$genericSp %in% generic.matchCount])
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% testMatch) %>% mutate(reconcile = "countMatchGeneric"))
front <- front %>% filter(!(genericSp %in% generic.matchCount)) # drop species with generic count matches
rear <- rear %>% filter(!(genericSp %in% generic.matchCount))
}
#Code any perfect bin matches (species, count bin both match) ####
for (i in 1:nrow(front)) {
if (nrow(rear) > 0) { # rear is decremented by each match, so need to ensure there are still records to check
testMatch <- rear$index[front$check3[i] == rear$check3][1] # select first match
if(!is.na(testMatch)) { # if there is a match, do
matchType <- "perfectBinMatch"
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% c(front$index[i],testMatch)) %>% mutate(reconcile = matchType)
)
rear <- rear %>% filter(index != testMatch) # drop matched record
} # end reconcile code update if match exists
} # end action loop if rear still has records to check
} # end i loop
# 2a. Check front species to rear generic: ####
front <- front %>% filter(!(index %in% outFrame$index)) # matched records were dropped in rear, need to be dropped from front
if (nrow(front) > 0) { # first need to check there are still front records to match ....
# This is the same code as above, using check2 for front, then repeated with check2 for rear:
for (i in 1:nrow(front)) {
if (nrow(rear) > 0) { # rear is decremented by each match, so need to ensure there are still records to check
testMatch <- rear$index[front$check4[i] == rear$check4][1] # select first match
if(!is.na(testMatch)) { # if there is a match, do
matchType <- "perfectBinMatchGen"
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% c(front$index[i],testMatch)) %>% mutate(reconcile = matchType)
)
rear <- rear %>% filter(index != testMatch) # drop matched record
} # end reconcile code update if match exists
} # end action loop if rear still has records to check
} # end i loop
} # end action loop if front still has records to check
# 2b. Check rear species to front generic: ####
front <- front %>% filter(!(index %in% outFrame$index)) # matched records were dropped in rear, need to be dropped from front
if (nrow(rear) > 0) { # first need to check there are still rear records to match ....
# This is the same code as above, using check2 for rear, then repeated with check2 for front:
for (i in 1:nrow(rear)) {
if (nrow(front) > 0) { # front is decremented by each match, so need to ensure there are still records to check
testMatch <- front$index[rear$check4[i] == front$check4][1] # select first match
if(!is.na(testMatch)) { # if there is a match, do
matchType <- "perfectBinMatchGen"
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% c(rear$index[i],testMatch)) %>% mutate(reconcile = matchType)
)
front <- front %>% filter(index != testMatch) # drop matched record
} # end reconcile code update if match exists
} # end action loop if front still has records to check
} # end i loop
} # end action loop if rear still has records to check
rear <- rear %>% filter(!(index %in% outFrame$index)) # matched records were dropped in front, need to be dropped from rear
# Code species matches where counts and bins do not match
sp.match <- intersect(front$species, rear$species) # species without count matches ....
testMatch <- c(front$index[front$species %in% sp.match],
rear$index[rear$species %in% sp.match])
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% testMatch) %>% mutate(reconcile = "speciesMatch"))
front <- front %>% filter(!(species %in% sp.match)) # drop species with matches
rear <- rear %>% filter(!(species %in% sp.match))
# Code generic matches where counts and bins do not match
generic.match <- intersect(front$genericSp, rear$genericSp) # generic without count matches .... include ambiguous groups
testMatch <- c(front$index[front$genericSp %in% generic.match],
rear$index[rear$genericSp %in% generic.match])
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% testMatch) %>% mutate(reconcile = "genericMatch"))
front <- front %>% filter(!(genericSp %in% generic.match)) # drop species with generic matches
rear <- rear %>% filter(!(genericSp %in% generic.match))
sp.mismatch <- setdiff(union(front$species,rear$species),intersect(front$species,rear$species))
if(length(sp.mismatch) > 0) { # start code mismatched species
testMatch <- c(front$index[front$species %in% sp.mismatch],
rear$index[rear$species %in% sp.mismatch])
outFrame <- bind_rows(outFrame,
inFrame %>% filter(index %in% testMatch) %>% mutate(reconcile = "misMatch"))
} # end coding misMatched species
} # end accounting for recording and count differences
outFrame <- arrange(outFrame, index)
# print(outFrame)
outFrame$reconcile # inFrame index should be smallest to largest within grp
} # End of matching function