Skip to content

Commit

Permalink
feat: add middle initial and new venue classification (#117)
Browse files Browse the repository at this point in the history
  • Loading branch information
blackmad authored Jan 21, 2021
1 parent eb9dd5b commit dcfed35
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 1 deletion.
10 changes: 10 additions & 0 deletions classification/MiddleInitialClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('../classification/Classification')

class MiddleInitialClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'middle_initial'
}
}

module.exports = MiddleInitialClassification
24 changes: 24 additions & 0 deletions classification/MiddleInitialClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./MiddleInitialClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'middle_initial')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`MiddleInitialClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
17 changes: 17 additions & 0 deletions classifier/MiddleInitialClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
const PhraseClassifier = require('./super/PhraseClassifier')
const MiddleInitialClassification = require('../classification/MiddleInitialClassification')

const SingleLetterRegExp = /^[A-Za-z]\.?$/

class MiddleInitialClassifier extends PhraseClassifier {
setup () {
}

each (span) {
if (SingleLetterRegExp.test(span.body)) {
span.classify(new MiddleInitialClassification(1))
}
}
}

module.exports = MiddleInitialClassifier
56 changes: 56 additions & 0 deletions classifier/MiddleInitialClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
const MiddleInitialClassifier = require('./MiddleInitialClassifier')
const MiddleInitialClassification = require('../classification/MiddleInitialClassification')
const Span = require('../tokenization/Span')
const classifier = new MiddleInitialClassifier()

module.exports.tests = {}

function classify (body) {
let s = new Span(body)
classifier.each(s, null, 1)
return s
}

module.exports.tests.classify = (test) => {
const valid = [
'M.',
'M'
]

valid.forEach(token => {
test(`classify: ${token}`, (t) => {
let s = classify(token)
t.deepEqual(s.classifications, {
MiddleInitialClassification: new MiddleInitialClassification(1.0)
})
t.end()
})
})

const invalid = [
'Mae',
'122',
'M,',
'&',
'Mr',
'Esq'
]

invalid.forEach(token => {
test(`classify: ${token}`, (t) => {
let s = classify(token)
t.deepEqual(s.classifications, {})
t.end()
})
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`MiddleInitialClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
3 changes: 2 additions & 1 deletion classifier/scheme/person.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ module.exports = [
},
{
// Raul Leite Magalhães (first name, middle name, family name)
// Donald W. Reynolds
confidence: 0.5,
Class: PersonClassification,
scheme: [
Expand All @@ -61,7 +62,7 @@ module.exports = [
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['GivenNameClassification', 'SurnameClassification'],
is: ['GivenNameClassification', 'SurnameClassification', 'MiddleInitialClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
Expand Down
16 changes: 16 additions & 0 deletions classifier/scheme/venue.js
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,22 @@ module.exports = [
}
]
},
{
// Donald W Reynolds Stadium
// boost confidence slightly above street for "Donald W"
confidence: 0.82,
Class: VenueClassification,
scheme: [
{
is: ['PersonClassification'],
not: ['StreetClassification']
},
{
is: ['PlaceClassification', 'VenueClassification'],
not: ['StreetClassification']
}
]
},
{
// ZAC du Pré
confidence: 0.8,
Expand Down
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const StopWordClassifier = require('../classifier/StopWordClassifier')
const PersonClassifier = require('../classifier/PersonClassifier')
const GivenNameClassifier = require('../classifier/GivenNameClassifier')
const SurnameClassifier = require('../classifier/SurnameClassifier')
const MiddleInitialClassifier = require('../classifier/MiddleInitialClassifier')
const PersonalSuffixClassifier = require('../classifier/PersonalSuffixClassifier')
const PersonalTitleClassifier = require('../classifier/PersonalTitleClassifier')
const ChainClassifier = require('../classifier/ChainClassifier')
Expand Down Expand Up @@ -67,6 +68,7 @@ class AddressParser extends Parser {
new PersonClassifier(),
new GivenNameClassifier(),
new SurnameClassifier(),
new MiddleInitialClassifier(),
new PersonalSuffixClassifier(),
new PersonalTitleClassifier(),
new ChainClassifier(),
Expand Down
10 changes: 10 additions & 0 deletions test/venue.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ const testcase = (test, common) => {
{ locality: 'NYC' }
])

// checking that NAME INITIAL NAME works
assert('Donald W Reynolds Stadium', [
{ venue: 'Donald W Reynolds Stadium' }
])

assert('Donald W. Reynolds Stadium', [
{ venue: 'Donald W. Reynolds Stadium' }
])

// checking that "japan" is interpreted as a country, not a city
assert('Universal Studios Japan', [
{ venue: 'Universal Studios' },
{ country: 'Japan' }
Expand Down

0 comments on commit dcfed35

Please sign in to comment.