Skip to content

Commit

Permalink
feat(unit_types): add OrphanedUnitTypeDeclassifier
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink authored and Joxit committed Jun 2, 2020
1 parent f7da2b4 commit 6afdd7b
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 0 deletions.
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const LeadingAreaDeclassifier = require('../solver/LeadingAreaDeclassifier')
const MultiStreetSolver = require('../solver/MultiStreetSolver')
const InvalidSolutionFilter = require('../solver/InvalidSolutionFilter')
const TokenDistanceFilter = require('../solver/TokenDistanceFilter')
const OrphanedUnitTypeDeclassifier = require('../solver/OrphanedUnitTypeDeclassifier')
const MustNotPreceedFilter = require('../solver/MustNotPreceedFilter')
const MustNotFollowFilter = require('../solver/MustNotFollowFilter')
const SubsetFilter = require('../solver/SubsetFilter')
Expand Down Expand Up @@ -125,6 +126,7 @@ class AddressParser extends Parser {
new MustNotFollowFilter('LocalityClassification', 'CountryClassification'),
new HouseNumberPositionPenalty(),
new TokenDistanceFilter(),
new OrphanedUnitTypeDeclassifier(),
new SubsetFilter()
],
options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sf
!us
!germany
!empire
!unit
# remove any localities which share a name with a US state
!alabama
!alaska
Expand Down
25 changes: 25 additions & 0 deletions solver/OrphanedUnitTypeDeclassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// enforce that any solution containing a UnitTypeClassification
// MUST also include as UnitClasification

class OrphanedUnitTypeDeclassifier {
solve (tokenizer) {
tokenizer.solution = tokenizer.solution.filter(s => {
// only applies to solutions containing a UnitTypeClassification
let unitType = s.pair.filter(p => p.classification.constructor.name === 'UnitTypeClassification')
if (unitType.length === 0) { return true }

// check for presence of a UnitClassification
let unit = s.pair.filter(p => p.classification.constructor.name === 'UnitClassification')

// remove UnitTypeClassification with no corresponding UnitClassification
if (unit.length === 0) {
s.pair = s.pair.filter(p => p.classification.constructor.name !== 'UnitTypeClassification')
return s.pair.length > 0
}

return true
})
}
}

module.exports = OrphanedUnitTypeDeclassifier
81 changes: 81 additions & 0 deletions solver/OrphanedUnitTypeDeclassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
const Tokenizer = require('../tokenization/Tokenizer')
const Span = require('../tokenization/Span')
const UnitTypeClassification = require('../classification/UnitTypeClassification')
const UnitClassification = require('../classification/UnitClassification')
const StreetClassification = require('../classification/StreetClassification')
const Solution = require('./Solution')
const SolutionPair = require('./SolutionPair')
const OrphanedUnitTypeDeclassifier = require('./OrphanedUnitTypeDeclassifier')

module.exports.tests = {}

module.exports.tests.unit_type_missing_unit = (test) => {
test('UnitClassification missing: remove UnitTypeClassification', (t) => {
let tok = new Tokenizer()

let s1 = new Span('A')
s1.start = 0
s1.end = 1

let s2 = new Span('B')
s2.start = 3
s2.end = 4

let sp1 = new SolutionPair(s1, new UnitTypeClassification(1.0))
let sp2 = new SolutionPair(s2, new StreetClassification(1.0))

tok.solution = [new Solution([sp1, sp2])]

let c = new OrphanedUnitTypeDeclassifier()
c.solve(tok)

t.deepEquals(tok.solution.length, 1)
t.deepEquals(tok.solution[0].pair.length, 1)
t.deepEquals(tok.solution[0].pair[0], sp2)
t.end()
})
}

module.exports.tests.both_classifications_present = (test) => {
test('UnitClassification present: do not remove UnitTypeClassification', (t) => {
let tok = new Tokenizer()

let s1 = new Span('A')
s1.start = 0
s1.end = 1

let s2 = new Span('B')
s2.start = 3
s2.end = 4

let s3 = new Span('C')
s2.start = 6
s2.end = 7

let sp1 = new SolutionPair(s1, new UnitTypeClassification(1.0))
let sp2 = new SolutionPair(s2, new UnitClassification(1.0))
let sp3 = new SolutionPair(s3, new StreetClassification(1.0))

tok.solution = [new Solution([sp1, sp2, sp3])]

let c = new OrphanedUnitTypeDeclassifier()
c.solve(tok)

t.deepEquals(tok.solution.length, 1)
t.deepEquals(tok.solution[0].pair.length, 3)
t.deepEquals(tok.solution[0].pair[0], sp1)
t.deepEquals(tok.solution[0].pair[1], sp2)
t.deepEquals(tok.solution[0].pair[2], sp3)
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`OrphanedUnitTypeDeclassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
6 changes: 6 additions & 0 deletions test/functional.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ const testcase = (test, common) => {
assert('1 California USA', [], false)
assert('1 90210', [], false)

// unit type specified with no accompanying unit number, unit type should
// be removed by the OrphanedUnitTypeDeclassifier.
assert('Apartment', [], false)
assert('Unit', [], false)
assert('Space', [], false)

// do not parse 'aus' as a locality if it follows a region
assert('new south wales aus', [
{ region: 'new south wales' }, { country: 'aus' }
Expand Down

0 comments on commit 6afdd7b

Please sign in to comment.