Skip to content

Commit

Permalink
feat(unit): more permissive unit patterns (#96)
Browse files Browse the repository at this point in the history
  • Loading branch information
blackmad authored Jun 3, 2020
1 parent c0dc786 commit 4667240
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 11 deletions.
39 changes: 29 additions & 10 deletions classifier/UnitClassifier.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,38 @@
const WordClassifier = require('./super/WordClassifier')
const UnitClassification = require('../classification/UnitClassification')

class UnitClassifier extends WordClassifier {
each (span) {
// skip spans which do not contain numbers
if (!span.contains.numerals) { return }
const AllNumbersRegExp = /^#?\d+$/
const SingleLetterRegExp = /^#?[A-Za-z]$/
const NumbersThenLetterRegExp = /^#?\d+[A-Za-z]$/
const LetterThenNumbersRegExp = /^#?[A-Za-z]\d+$/

// based on https://stackoverflow.com/questions/9213237/combining-regular-expressions-in-javascript
function combineRegExps (...args) {
var components = []

args.forEach((arg) => {
components = components.concat(arg._components || arg.source)
})

if (/^\d+$/.test(span.body)) {
let prev = span.graph.findOne('prev')
var combined = new RegExp('(?:' + components.join(')|(?:') + ')')
return combined
}

const combinedUnitRegexp = combineRegExps(
AllNumbersRegExp,
SingleLetterRegExp,
NumbersThenLetterRegExp,
LetterThenNumbersRegExp
)

// Unit must be preceded by unit type
if (!prev || !prev.classifications.hasOwnProperty('UnitTypeClassification')) {
return
}
class UnitClassifier extends WordClassifier {
each (span) {
const prev = span.graph.findOne('prev')
const hasPrevUnitToken = prev && prev.classifications.hasOwnProperty('UnitTypeClassification')

// If the previous token in a unit word, like apt or suite
// and this token is something like A2, 3b, 120, A, label it as a unit (number)
if (hasPrevUnitToken && combinedUnitRegexp.test(span.body)) {
span.classify(new UnitClassification(1))
}
}
Expand Down
32 changes: 31 additions & 1 deletion classifier/UnitClassifier.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@ module.exports.tests.without_unit_type = (test) => {
t.deepEqual(s.classifications, { })
t.end()
})
test('letter and number without unit type', (t) => {
let s = classify('a2')
t.deepEqual(s.classifications, { })
t.end()
})
test('single letter without unit type', (t) => {
let s = classify('a')
t.deepEqual(s.classifications, { })
t.end()
})
test('number with # without unit type', (t) => {
let s = classify('#22')
t.deepEqual(s.classifications, { })
t.end()
})
}

module.exports.tests.with_unit_type = (test) => {
Expand All @@ -48,7 +63,22 @@ module.exports.tests.with_unit_type = (test) => {
})
test('number and letter with unit type', (t) => {
let s = classify('2020a', 'unit')
t.deepEqual(s.classifications, { })
t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
t.end()
})
test('letter and number with unit type', (t) => {
let s = classify('a2', 'unit')
t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
t.end()
})
test('single letter with unit type', (t) => {
let s = classify('a', 'unit')
t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
t.end()
})
test('number with # with unit type', (t) => {
let s = classify('#22', 'unit')
t.deepEqual(s.classifications, { UnitClassification: new UnitClassification(1.0) })
t.end()
})
}
Expand Down
53 changes: 53 additions & 0 deletions test/address.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,59 @@ const testcase = (test, common) => {
assert('1389a IA 42 IA', [{ housenumber: '1389a' }, { street: 'IA 42' }, { region: 'IA' }], true)

assert('1111 MD 760, Lusby, MD, USA', [{ housenumber: '1111' }, { street: 'MD 760' }, { locality: 'Lusby' }, { region: 'MD' }, { country: 'USA' }], true)

// unit + unit number tests
assert('52 Ten Eyck St Apt 3 Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '3' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St Apt 3b Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '3b' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St Apt 3B Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '3B' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St Apt #3b Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '#3b' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St 3 Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St 3 Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('6 Montague Terrace Apt A2 Brooklyn NY', [
{ housenumber: '6' }, { street: 'Montague Terrace' },
{ unit_type: 'Apt' },
{ unit: 'A2' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])
}

module.exports.all = (tape, common) => {
Expand Down

0 comments on commit 4667240

Please sign in to comment.