Skip to content

Commit

Permalink
place classification (#25)
Browse files Browse the repository at this point in the history
* feat(dictionaries): dictionary updates

* feat(place): promote "place" to a public classification
  • Loading branch information
missinglink authored May 16, 2019
1 parent ccd54be commit 3031972
Show file tree
Hide file tree
Showing 12 changed files with 107 additions and 7 deletions.
1 change: 1 addition & 0 deletions classification/PlaceClassification.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const Classification = require('../classification/Classification')
class PlaceClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.public = true
this.label = 'place'
}
}
Expand Down
2 changes: 1 addition & 1 deletion classification/PlaceClassification.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module.exports.tests = {}
module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.true(c.public)
t.equals(c.label, 'place')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
Expand Down
10 changes: 10 additions & 0 deletions classifier/PlaceClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ class PlaceClassifier extends WordClassifier {
// skip spans which contain numbers
if (span.contains.numerals) { return }

// do not classify tokens preceeded by an 'IntersectionClassification'
let firstChild = span.graph.findOne('child:first') || span
let prev = firstChild.graph.findOne('prev')
if (
prev && (
prev.classifications.hasOwnProperty('IntersectionClassification')
)) {
return
}

// use an inverted index for full token matching as it's O(1)
if (this.index.hasOwnProperty(span.norm)) {
span.classify(new PlaceClassification(1.0))
Expand Down
49 changes: 49 additions & 0 deletions classifier/scheme/place.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const PlaceClassification = require('../../classification/PlaceClassification')

module.exports = [
{
// University Hospital
confidence: 1.0,
Class: PlaceClassification,
scheme: [
{
is: ['PlaceClassification'],
not: ['StreetClassification']
},
{
is: ['PlaceClassification'],
not: ['StreetClassification']
}
]
},
{
// +++ Park
confidence: 0.9,
Class: PlaceClassification,
scheme: [
{
is: ['AlphaClassification'],
not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification']
},
{
is: ['PlaceClassification'],
not: ['StreetClassification']
}
]
},
{
// Mt +++ Park
confidence: 0.8,
Class: PlaceClassification,
scheme: [
{
is: ['PlaceClassification'],
not: ['StreetClassification']
},
{
is: ['PlaceClassification'],
not: []
}
]
}
]
9 changes: 8 additions & 1 deletion parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ class AddressParser extends Parser {
new CompositeClassifier(require('../classifier/scheme/person')),
new CompositeClassifier(require('../classifier/scheme/street_name')),
new CompositeClassifier(require('../classifier/scheme/street')),
new CompositeClassifier(require('../classifier/scheme/intersection'))
new CompositeClassifier(require('../classifier/scheme/intersection')),
new CompositeClassifier(require('../classifier/scheme/place'))
],
// solvers
[
Expand All @@ -85,6 +86,12 @@ class AddressParser extends Parser {
['HouseNumberClassification', 'PostcodeClassification', 'RegionClassification'],
['HouseNumberClassification', 'PostcodeClassification', 'CountryClassification']
]),
new MustNotFollowFilter('PlaceClassification', 'HouseNumberClassification'),
new MustNotFollowFilter('PlaceClassification', 'StreetClassification'),
new MustNotFollowFilter('PlaceClassification', 'LocalityClassification'),
new MustNotFollowFilter('PlaceClassification', 'RegionClassification'),
new MustNotFollowFilter('PlaceClassification', 'CountryClassification'),
new MustNotFollowFilter('PlaceClassification', 'PostcodeClassification'),
new MustNotPreceedFilter('PostcodeClassification', 'HouseNumberClassification'),
new MustNotPreceedFilter('PostcodeClassification', 'StreetClassification'),
new MustNotPreceedFilter('LocalityClassification', 'HouseNumberClassification'),
Expand Down
1 change: 1 addition & 0 deletions solver/MultiStreetSolver.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class MultiStreetSolver extends HashMapSolver {

// remove any pairs which are more granular than street (not applicable for intersections)
sol.pair = sol.pair.filter(p => p.classification.constructor.name !== 'HouseNumberClassification')
sol.pair = sol.pair.filter(p => p.classification.constructor.name !== 'PlaceClassification')

let success = false

Expand Down
8 changes: 7 additions & 1 deletion solver/Solution.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,13 @@ class Solution {
// use the original input, mask should be the same length
let body = tokenizer.span.body
let mask = Array(body.length).fill(' ')
let map = { 'housenumber': 'N', 'street': 'S', 'postcode': 'P', 'default': 'A' }
let map = {
'place': 'V',
'housenumber': 'N',
'street': 'S',
'postcode': 'P',
'default': 'A'
}

// scan the input letter-by-letter from left-to-right
for (let i = 0; i < body.length; i++) {
Expand Down
8 changes: 4 additions & 4 deletions solver/Solution.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,21 @@ module.exports.tests.constructor = (test) => {
module.exports.tests.mask = (test) => {
let parser = new AddressParser()
test('mask', (t) => {
// ' SSSSSSSSSSSS NN PPPPP AAAAAA'
// 'VVVVVV VVV SSSSSSSSSSSS NN PPPPP AAAAAA'
let tokenizer = new Tokenizer('Kaschk Bar, Linienstraße 40 10119 Berlin')
parser.classify(tokenizer)
parser.solve(tokenizer)

t.equal(tokenizer.solution[0].mask(tokenizer), ' SSSSSSSSSSSS NN PPPPP AAAAAA')
t.equal(tokenizer.solution[0].mask(tokenizer), 'VVVVVVVVVV SSSSSSSSSSSS NN PPPPP AAAAAA')
t.end()
})
test('mask', (t) => {
// ' NN SSSSSSS AAAAAA PPPPP '
// 'VVV VVVV NN SSSSSSS AAAAAA PPPPP '
let tokenizer = new Tokenizer('Foo Cafe 10 Main St London 10010 Earth')
parser.classify(tokenizer)
parser.solve(tokenizer)

t.equal(tokenizer.solution[0].mask(tokenizer), ' NN SSSSSSS AAAAAA PPPPP ')
t.equal(tokenizer.solution[0].mask(tokenizer), 'VVVVVVVV NN SSSSSSS AAAAAA PPPPP ')
t.end()
})
}
Expand Down
6 changes: 6 additions & 0 deletions test/address.deu.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ const testcase = (test, common) => {
{ street: 'Am Bürgerpark' }, { housenumber: '15-18' },
{ postcode: '13156' }, { locality: 'Berlin' }
], true)

assert('Kaschk Bar, Linienstraße 40 10119 Berlin', [
{ place: 'Kaschk Bar' },
{ street: 'Linienstraße' }, { housenumber: '40' },
{ postcode: '10119' }, { locality: 'Berlin' }
], true)
}

module.exports.all = (tape, common) => {
Expand Down
18 changes: 18 additions & 0 deletions test/address.gbr.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
const AddressParser = require('../parser/AddressParser')

const testcase = (test, common) => {
let parser = new AddressParser()
let assert = common.assert.bind(null, test, parser)

assert('Rushendon Furlong', [
{ street: 'Rushendon Furlong' }
], true)
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`address GBR: ${name}`, testFunction)
}

testcase(test, common)
}
1 change: 1 addition & 0 deletions test/addressit.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ const testcase = (test, common) => {
// ], true)

assert('Mt Tabor Park, 6220 SE Salmon St, Portland, OR 97215, USA', [
{ place: 'Mt Tabor Park' },
{ housenumber: '6220' }, { street: 'SE Salmon St' },
{ locality: 'Portland' }, { region: 'OR' },
{ postcode: '97215' }
Expand Down
1 change: 1 addition & 0 deletions test/functional.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ const testcase = (test, common) => {
// do not classify tokens preceeded by a 'place' as
// an admin classification
assert('Portland Cafe Portland OR', [
{ place: 'Portland Cafe' },
{ locality: 'Portland' }, { region: 'OR' }
], true)

Expand Down

0 comments on commit 3031972

Please sign in to comment.