diff --git a/classifier/WhosOnFirstClassifier.js b/classifier/WhosOnFirstClassifier.js index 35c6f1ad..14f4c1aa 100644 --- a/classifier/WhosOnFirstClassifier.js +++ b/classifier/WhosOnFirstClassifier.js @@ -9,23 +9,24 @@ const whosonfirst = require('../resources/whosonfirst/whosonfirst') // databases sourced from the WhosOnFirst project // see: https://whosonfirst.org +// note: these should be defined from most granular to least granular const placetypes = { - // 'country': { - // files: ['wof:country.txt', 'wof:shortcode.txt', 'name:eng_x_preferred.txt'], - // classifications: [AreaClassification, CountryClassification] - // }, - // 'dependency': { - // files: ['wof:shortcode.txt', 'name:eng_x_preferred.txt'], - // classifications: [AreaClassification, DependencyClassification] - // }, - 'region': { - files: ['abrv:eng_x_preferred.txt', 'name:eng_x_preferred.txt'], - classifications: [AreaClassification, RegionClassification] - }, 'locality': { files: ['name:eng_x_preferred.txt'], classifications: [AreaClassification, LocalityClassification] + }, + 'region': { + files: ['abrv:eng_x_preferred.txt', 'name:eng_x_preferred.txt'], + classifications: [AreaClassification, RegionClassification] } + // 'dependency': { + // files: ['wof:shortcode.txt', 'name:eng_x_preferred.txt'], + // classifications: [AreaClassification, DependencyClassification] + // }, + // 'country': { + // files: ['wof:country.txt', 'wof:shortcode.txt', 'name:eng_x_preferred.txt'], + // classifications: [AreaClassification, CountryClassification] + // }, } class WhosOnFirstClassifier extends PhraseClassifier { @@ -37,7 +38,7 @@ class WhosOnFirstClassifier extends PhraseClassifier { whosonfirst.load(this.tokens[placetype], [placetype], file) }) - // blacklist + // general blacklist this.tokens[placetype].delete('north') this.tokens[placetype].delete('south') this.tokens[placetype].delete('east') @@ -62,6 +63,19 @@ class WhosOnFirstClassifier extends PhraseClassifier { this.tokens[placetype].delete('town') this.tokens[placetype].delete('city') this.tokens[placetype].delete('grand') + + // placetype specific modifications + if (placetype === 'locality') { + // these are the only two decent values in + // file: locality/abrv:eng_x_preferred.txt + this.tokens.locality.add('nyc') + this.tokens.locality.add('sf') + + // remove problematic locality names + this.tokens.locality.delete('texas') + this.tokens.locality.delete('california') + this.tokens.locality.delete('italy') + } }) } diff --git a/test/addressit.usa.test.js b/test/addressit.usa.test.js index 5a76c1c9..0865069f 100644 --- a/test/addressit.usa.test.js +++ b/test/addressit.usa.test.js @@ -10,12 +10,12 @@ const testcase = (test, common) => { assert('123 Main St, New York, NY 10010', [ [ { housenumber: '123' }, { street: 'Main St' }, - { region: 'New York' }, { locality: 'NY' }, + { locality: 'New York' }, { region: 'NY' }, { postcode: '10010' } ], [ { housenumber: '123' }, { street: 'Main St' }, - { locality: 'New York' }, { region: 'NY' }, + { region: 'New York' }, { locality: 'NY' }, { postcode: '10010' } ] ]) @@ -23,12 +23,12 @@ const testcase = (test, common) => { assert('123 Main St New York, NY 10010', [ [ { housenumber: '123' }, { street: 'Main St' }, - { region: 'New York' }, { locality: 'NY' }, + { locality: 'New York' }, { region: 'NY' }, { postcode: '10010' } ], [ { housenumber: '123' }, { street: 'Main St' }, - { locality: 'New York' }, { region: 'NY' }, + { region: 'New York' }, { locality: 'NY' }, { postcode: '10010' } ] ]) @@ -36,12 +36,12 @@ const testcase = (test, common) => { assert('123 Main St New York NY 10010', [ [ { housenumber: '123' }, { street: 'Main St' }, - { region: 'New York' }, { locality: 'NY' }, + { locality: 'New York' }, { region: 'NY' }, { postcode: '10010' } ], [ { housenumber: '123' }, { street: 'Main St' }, - { locality: 'New York' }, { region: 'NY' }, + { region: 'New York' }, { locality: 'NY' }, { postcode: '10010' } ] ]) @@ -88,11 +88,11 @@ const testcase = (test, common) => { assert('425 W 23rd St, New York, NY 10011', [ [ { housenumber: '425' }, { street: 'W 23rd St' }, - { region: 'New York' }, { locality: 'NY' }, { postcode: '10011' } + { locality: 'New York' }, { region: 'NY' }, { postcode: '10011' } ], [ { housenumber: '425' }, { street: 'W 23rd St' }, - { locality: 'New York' }, { region: 'NY' }, { postcode: '10011' } + { region: 'New York' }, { locality: 'NY' }, { postcode: '10011' } ] ]) @@ -115,23 +115,22 @@ const testcase = (test, common) => { assert('Dallas', [[{ locality: 'Dallas' }]]) assert('California', [ - [{ region: 'California' }], - [{ locality: 'California' }] + [{ region: 'California' }] ]) assert('New York', [ - [{ region: 'New York' }], - [{ locality: 'New York' }] + [{ locality: 'New York' }], + [{ region: 'New York' }] ]) assert('New York, NY', [ - [{ region: 'New York' }, { locality: 'NY' }], - [{ locality: 'New York' }, { region: 'NY' }] + [{ locality: 'New York' }, { region: 'NY' }], + [{ region: 'New York' }, { locality: 'NY' }] ]) assert('New York, New York', [ - [{ region: 'New York' }, { locality: 'New York' }], - [{ locality: 'New York' }, { region: 'New York' }] + [{ locality: 'New York' }, { region: 'New York' }], + [{ region: 'New York' }, { locality: 'New York' }] ]) // assert('northern mariana islands', []) diff --git a/test/functional.test.js b/test/functional.test.js index 89ddb876..ac42df9a 100644 --- a/test/functional.test.js +++ b/test/functional.test.js @@ -47,7 +47,7 @@ const testcase = (test, common) => { // street with directional, ordinal & admin info assert('West 26th Street, New York, NYC, 10010', [ - [{ street: 'West 26th Street' }, { region: 'New York' }, { postcode: '10010' }] + [{ street: 'West 26th Street' }, { locality: 'New York' }, { postcode: '10010' }] ]) }