Skip to content

Commit

Permalink
classifier: change locality vs. region priority
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed May 3, 2019
1 parent fea5eed commit ddddc44
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 30 deletions.
40 changes: 27 additions & 13 deletions classifier/WhosOnFirstClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@ const whosonfirst = require('../resources/whosonfirst/whosonfirst')
// databases sourced from the WhosOnFirst project
// see: https://whosonfirst.org

// note: these should be defined from most granular to least granular
const placetypes = {
// 'country': {
// files: ['wof:country.txt', 'wof:shortcode.txt', 'name:eng_x_preferred.txt'],
// classifications: [AreaClassification, CountryClassification]
// },
// 'dependency': {
// files: ['wof:shortcode.txt', 'name:eng_x_preferred.txt'],
// classifications: [AreaClassification, DependencyClassification]
// },
'region': {
files: ['abrv:eng_x_preferred.txt', 'name:eng_x_preferred.txt'],
classifications: [AreaClassification, RegionClassification]
},
'locality': {
files: ['name:eng_x_preferred.txt'],
classifications: [AreaClassification, LocalityClassification]
},
'region': {
files: ['abrv:eng_x_preferred.txt', 'name:eng_x_preferred.txt'],
classifications: [AreaClassification, RegionClassification]
}
// 'dependency': {
// files: ['wof:shortcode.txt', 'name:eng_x_preferred.txt'],
// classifications: [AreaClassification, DependencyClassification]
// },
// 'country': {
// files: ['wof:country.txt', 'wof:shortcode.txt', 'name:eng_x_preferred.txt'],
// classifications: [AreaClassification, CountryClassification]
// },
}

class WhosOnFirstClassifier extends PhraseClassifier {
Expand All @@ -37,7 +38,7 @@ class WhosOnFirstClassifier extends PhraseClassifier {
whosonfirst.load(this.tokens[placetype], [placetype], file)
})

// blacklist
// general blacklist
this.tokens[placetype].delete('north')
this.tokens[placetype].delete('south')
this.tokens[placetype].delete('east')
Expand All @@ -62,6 +63,19 @@ class WhosOnFirstClassifier extends PhraseClassifier {
this.tokens[placetype].delete('town')
this.tokens[placetype].delete('city')
this.tokens[placetype].delete('grand')

// placetype specific modifications
if (placetype === 'locality') {
// these are the only two decent values in
// file: locality/abrv:eng_x_preferred.txt
this.tokens.locality.add('nyc')
this.tokens.locality.add('sf')

// remove problematic locality names
this.tokens.locality.delete('texas')
this.tokens.locality.delete('california')
this.tokens.locality.delete('italy')
}
})
}

Expand Down
31 changes: 15 additions & 16 deletions test/addressit.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,38 @@ const testcase = (test, common) => {
assert('123 Main St, New York, NY 10010', [
[
{ housenumber: '123' }, { street: 'Main St' },
{ region: 'New York' }, { locality: 'NY' },
{ locality: 'New York' }, { region: 'NY' },
{ postcode: '10010' }
],
[
{ housenumber: '123' }, { street: 'Main St' },
{ locality: 'New York' }, { region: 'NY' },
{ region: 'New York' }, { locality: 'NY' },
{ postcode: '10010' }
]
])

assert('123 Main St New York, NY 10010', [
[
{ housenumber: '123' }, { street: 'Main St' },
{ region: 'New York' }, { locality: 'NY' },
{ locality: 'New York' }, { region: 'NY' },
{ postcode: '10010' }
],
[
{ housenumber: '123' }, { street: 'Main St' },
{ locality: 'New York' }, { region: 'NY' },
{ region: 'New York' }, { locality: 'NY' },
{ postcode: '10010' }
]
])

assert('123 Main St New York NY 10010', [
[
{ housenumber: '123' }, { street: 'Main St' },
{ region: 'New York' }, { locality: 'NY' },
{ locality: 'New York' }, { region: 'NY' },
{ postcode: '10010' }
],
[
{ housenumber: '123' }, { street: 'Main St' },
{ locality: 'New York' }, { region: 'NY' },
{ region: 'New York' }, { locality: 'NY' },
{ postcode: '10010' }
]
])
Expand Down Expand Up @@ -88,11 +88,11 @@ const testcase = (test, common) => {
assert('425 W 23rd St, New York, NY 10011', [
[
{ housenumber: '425' }, { street: 'W 23rd St' },
{ region: 'New York' }, { locality: 'NY' }, { postcode: '10011' }
{ locality: 'New York' }, { region: 'NY' }, { postcode: '10011' }
],
[
{ housenumber: '425' }, { street: 'W 23rd St' },
{ locality: 'New York' }, { region: 'NY' }, { postcode: '10011' }
{ region: 'New York' }, { locality: 'NY' }, { postcode: '10011' }
]
])

Expand All @@ -115,23 +115,22 @@ const testcase = (test, common) => {
assert('Dallas', [[{ locality: 'Dallas' }]])

assert('California', [
[{ region: 'California' }],
[{ locality: 'California' }]
[{ region: 'California' }]
])

assert('New York', [
[{ region: 'New York' }],
[{ locality: 'New York' }]
[{ locality: 'New York' }],
[{ region: 'New York' }]
])

assert('New York, NY', [
[{ region: 'New York' }, { locality: 'NY' }],
[{ locality: 'New York' }, { region: 'NY' }]
[{ locality: 'New York' }, { region: 'NY' }],
[{ region: 'New York' }, { locality: 'NY' }]
])

assert('New York, New York', [
[{ region: 'New York' }, { locality: 'New York' }],
[{ locality: 'New York' }, { region: 'New York' }]
[{ locality: 'New York' }, { region: 'New York' }],
[{ region: 'New York' }, { locality: 'New York' }]
])

// assert('northern mariana islands', [])
Expand Down
2 changes: 1 addition & 1 deletion test/functional.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ const testcase = (test, common) => {

// street with directional, ordinal & admin info
assert('West 26th Street, New York, NYC, 10010', [
[{ street: 'West 26th Street' }, { region: 'New York' }, { postcode: '10010' }]
[{ street: 'West 26th Street' }, { locality: 'New York' }, { postcode: '10010' }]
])
}

Expand Down

0 comments on commit ddddc44

Please sign in to comment.