Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support semi-colon delimited names #589

Merged
merged 5 commits into from
Feb 24, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion schema/address_karlsruhe.js
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@
@ref: http://wiki.openstreetmap.org/wiki/Karlsruhe_Schema
**/

var KARLSRUHE_SCHEMA = {
const KARLSRUHE_SCHEMA = {
'addr:housename': 'name',
'addr:housenumber': 'number',
'addr:street': 'street',
2 changes: 1 addition & 1 deletion schema/address_naptan.js
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@
@ref: http://wiki.openstreetmap.org/wiki/NaPTAN
**/

var NAPTAN_SCHEMA = {
const NAPTAN_SCHEMA = {
'naptan:Street': 'street'
};

2 changes: 1 addition & 1 deletion schema/address_osm.js
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@
@ref: http://wiki.openstreetmap.org/wiki/Key:postal_code
**/

var OSM_SCHEMA = {
const OSM_SCHEMA = {
'postal_code': 'zip'
};

2 changes: 1 addition & 1 deletion schema/address_tiger.js
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@
@ref: http://wiki.openstreetmap.org/wiki/TIGER_to_OSM_Attribute_Map
**/

var TIGER_SCHEMA = {
const TIGER_SCHEMA = {
'tiger:zip_left': 'zip',
'tiger:zip_right': 'zip'
};
15 changes: 1 addition & 14 deletions schema/name_osm.js
Original file line number Diff line number Diff line change
@@ -14,16 +14,13 @@
When multiple keys have the value 'default' then they are considered
as aliases of the default field.
The '_primary' property defined below defines which of those aliases
is considered the 'primary default name' for label generation.
No values other than 'default' should be specified more than once.
@ref: http://wiki.openstreetmap.org/wiki/Key:name
@ref: http://wiki.openstreetmap.org/wiki/Names
**/

var OSM_NAMING_SCHEMA = {
const OSM_NAMING_SCHEMA = {
'name': 'default',
'loc_name': 'default',
'alt_name': 'default',
@@ -38,14 +35,4 @@ var OSM_NAMING_SCHEMA = {
// 'sorting_name': 'sorting'
};

// this property is considered the 'primary name'
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was kinda weird and overly fancy, I removed it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Over the years we've learned many things you can do with Javascript, but shouldn't :P

// for label generation, the others are considered
// secondary or 'aliases'.
Object.defineProperty(OSM_NAMING_SCHEMA, '_primary', {
value: 'name',
enumerable: false,
configurable: false,
writable: false
});

module.exports = OSM_NAMING_SCHEMA;
24 changes: 12 additions & 12 deletions stream/address_extractor.js
Original file line number Diff line number Diff line change
@@ -23,11 +23,12 @@
not searchable.
**/

var through = require('through2');
var isObject = require('is-object');
var extend = require('extend');
var peliasLogger = require( 'pelias-logger' ).get( 'openstreetmap' );
var Document = require('pelias-model').Document;
const through = require('through2');
const isObject = require('is-object');
const extend = require('extend');
const peliasLogger = require( 'pelias-logger' ).get( 'openstreetmap' );
const Document = require('pelias-model').Document;
const parseSemicolonDelimitedValues = require('../util/parseSemicolonDelimitedValues');

function hasValidAddress( doc ){
if( !isObject( doc ) ){ return false; }
@@ -42,21 +43,20 @@ function hasValidAddress( doc ){
module.exports = function(){

var stream = through.obj( function( doc, enc, next ) {
var isNamedPoi = !!doc.getName('default');
var isAddress = hasValidAddress( doc );
const isNamedPoi = !!doc.getName('default');
const isAddress = hasValidAddress( doc );

// accept semi-colon delimited house numbers
// ref: https://github.com/pelias/openstreetmap/issues/21
var streetNumbers = (doc.getAddress('number') || '').split(';').map(Function.prototype.call, String.prototype.trim);
const streetNumbers = parseSemicolonDelimitedValues(doc.getAddress('number'));

// create a new record for street addresses
if( isAddress ){
var record;

streetNumbers.forEach( function( streetno, i ){
let record;

try {
var newid = [ doc.getSourceId() ];
const newid = [ doc.getSourceId() ];
if( i > 0 ){
newid.push( i );
peliasLogger.debug('[address_extractor] found multiple house numbers: ', streetNumbers);
@@ -116,7 +116,7 @@ module.exports = function(){
};

// properties to map from the osm record to the pelias doc
var addrProps = [ 'name', 'number', 'street', 'zip' ];
const addrProps = [ 'name', 'number', 'street', 'zip' ];

// call document setters and ignore non-fatal errors
function setProperties( record, doc ){
75 changes: 41 additions & 34 deletions stream/tag_mapper.js
Original file line number Diff line number Diff line change
@@ -7,10 +7,11 @@
const _ = require('lodash');
const through = require('through2');
const peliasLogger = require('pelias-logger').get('openstreetmap');
const parseSemicolonDelimitedValues = require('../util/parseSemicolonDelimitedValues');

var LOCALIZED_NAME_KEYS = require('../config/localized_name_keys');
var NAME_SCHEMA = require('../schema/name_osm');
var ADDRESS_SCHEMA = _.merge( {},
const LOCALIZED_NAME_KEYS = require('../config/localized_name_keys');
const NAME_SCHEMA = require('../schema/name_osm');
const ADDRESS_SCHEMA = _.merge( {},
require('../schema/address_tiger'),
require('../schema/address_osm'),
require('../schema/address_naptan'),
@@ -19,7 +20,7 @@ var ADDRESS_SCHEMA = _.merge( {},

module.exports = function(){

var stream = through.obj( function( doc, enc, next ) {
const stream = through.obj( function( doc, enc, next ) {

try {

@@ -35,34 +36,39 @@ module.exports = function(){

// Map localized names which begin with 'name:'
// @ref: http://wiki.openstreetmap.org/wiki/Namespace#Language_code_suffix
var suffix = getNameSuffix( key );
if( suffix ){
var val1 = trim( value );
if( val1 ){
doc.setName( suffix, val1 );
}
const langCode = getNameSuffix( key );
if( langCode ){
const langValues = parseSemicolonDelimitedValues( value );
langValues.forEach(( langValue, i ) => {
if ( i === 0 ) {
doc.setName( langCode, langValue );
} else {
doc.setNameAlias( langCode, langValue );
}
Comment on lines +42 to +47
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah nice this is much cleaner.

});
}

// Map name data from our name mapping schema
else if( _.has(NAME_SCHEMA, key) ){
var val2 = trim( value );
if( val2 ){
if( key === NAME_SCHEMA._primary ){
doc.setName( NAME_SCHEMA[key], val2 );
} else if ( 'default' === NAME_SCHEMA[key] ) {
doc.setNameAlias( NAME_SCHEMA[key], val2 );
} else {
doc.setName( NAME_SCHEMA[key], val2 );
const nameValues = parseSemicolonDelimitedValues( cleanString( value ) );
nameValues.forEach(( nameValue, i ) => {
// For the primary name key 'name', ensure it is the first value
if( 'name' === key && i === 0 ){
doc.setName(NAME_SCHEMA[key], nameValue);
return;
}
}

// Otherwise set as an alias
doc.setNameAlias( NAME_SCHEMA[key], nameValue );
});
}

// Map address data from our address mapping schema
else if( _.has(ADDRESS_SCHEMA, key) ){
var val3 = trim( value );
if( val3 ){
let label = ADDRESS_SCHEMA[key];
doc.setAddress(label, normalizeAddressField(label, val3));
const addrValue = cleanString( value );
if( addrValue ){
const label = ADDRESS_SCHEMA[key];
doc.setAddress(label, normalizeAddressField(label, addrValue));
}
}
});
@@ -71,21 +77,22 @@ module.exports = function(){
// other names which we could use as the default.
if( !doc.getName('default') ){

var defaultName =
_.get(tags, 'official_name') ||
_.get(tags, 'int_name') ||
_.get(tags, 'nat_name') ||
_.get(tags, 'reg_name') ||
doc.getName('en');
const defaultName = [
...parseSemicolonDelimitedValues(_.get(tags, 'official_name')),
...parseSemicolonDelimitedValues(_.get(tags, 'int_name')),
...parseSemicolonDelimitedValues(_.get(tags, 'nat_name')),
...parseSemicolonDelimitedValues(_.get(tags, 'reg_name')),
...parseSemicolonDelimitedValues(doc.getName('en'))
].filter(Boolean);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More fanciness :P

So just to confirm, this is because you can't use || when assigning to const, yeah?

Might actually be a use case for let in that case, as it's much more clear what the intention is, plus each of these function calls has to always be evaluated, whereas chained || will have an early return as soon as there's a truthy value, right?


// use one of the preferred name tags listed above
if ( defaultName ){
doc.setName('default', defaultName);
if ( defaultName.length ){
doc.setName('default', defaultName[0]);
}

// else try to use an available two-letter language name tag
else {
var keys = Object.keys(doc.name).filter(n => n.length === 2);
const keys = Object.keys(doc.name).filter(n => n.length === 2);

// unambiguous (there is only a single two-letter name tag)
if ( keys.length === 1 ){
@@ -101,7 +108,7 @@ module.exports = function(){
// Import airport codes as aliases
if( tags.hasOwnProperty('aerodrome') || tags.hasOwnProperty('aeroway') ){
if( tags.hasOwnProperty('iata') ){
var iata = trim( tags.iata );
const iata = cleanString( tags.iata );
if( iata ){
doc.setNameAlias( 'default', iata );
doc.setNameAlias( 'default', `${iata} Airport` );
@@ -127,7 +134,7 @@ module.exports = function(){
};

// Clean string of leading/trailing junk chars
function trim( str ){
function cleanString( str ){
return _.trim( str, '#$%^*<>-=_{};:",./?\t\n\' ' );
}

3 changes: 2 additions & 1 deletion test/run.js
Original file line number Diff line number Diff line change
@@ -17,7 +17,8 @@ var tests = [
require('./stream/pbf'),
require('./stream/stats'),
require('./stream/tag_mapper'),
require('./stream/addresses_without_street')
require('./stream/addresses_without_street'),
require('./util/parseSemicolonDelimitedValues')
];

tests.map(function(t) {
78 changes: 78 additions & 0 deletions test/stream/tag_mapper.js
Original file line number Diff line number Diff line change
@@ -110,6 +110,84 @@ module.exports.tests.osm_names = function(test, common) {
}));
stream.write(doc);
});

test('maps - name aliases - multiple alt_names', function(t) {
var doc = new Document('a','b',1);
doc.setMeta('tags', {
loc_name: 'loc_name',
nat_name: 'nat_name',
int_name: 'int_name',
name: 'name ;name2; name3',
alt_name: 'alt_name;alt_name2;alt_name3',
official_name: 'official_name',
old_name: 'old_name',
reg_name: 'reg_name',
short_name: 'short_name',
sorting_name: 'sorting_name'
});
var stream = mapper();
stream.pipe( through.obj( function( doc, enc, next ){
t.equal(doc.getName('default'), 'name', 'correctly mapped');
t.deepEqual(doc.getNameAliases('default'), [
'loc_name',
'name2','name3',
'alt_name','alt_name2','alt_name3',
'short_name'
], 'correctly mapped');

t.end(); // test will fail if not called (or called twice).
next();
}));

stream.write(doc);
});

test('maps - semi-colon delimited names', function(t) {
var doc = new Document('a','b',1);
doc.setMeta('tags', {
name: 'name ;name2; name3',
'name:de': 'ding ;ding2; ding3',
});
var stream = mapper();
stream.pipe( through.obj( function( doc, enc, next ){
t.equal(doc.getName('default'), 'name', 'correctly mapped');
t.deepEqual(doc.getNameAliases('default'), [
'name2','name3',
], 'correctly mapped');

t.equal(doc.getName('de'), 'ding', 'correctly mapped');
t.deepEqual(doc.getNameAliases('de'), [
'ding2','ding3',
], 'correctly mapped');

t.end(); // test will fail if not called (or called twice).
next();
}));

stream.write(doc);
});

test('maps - semi-colon delimited names - no "name" tag', function(t) {
var doc = new Document('a','b',1);
doc.setMeta('tags', {
'name:de': 'ding ;ding2; ding3',
});
var stream = mapper();
stream.pipe( through.obj( function( doc, enc, next ){
t.equal(doc.getName('default'), 'ding', 'correctly mapped');
t.deepEqual(doc.getNameAliases('default'), [], 'correctly mapped');

t.equal(doc.getName('de'), 'ding', 'correctly mapped');
t.deepEqual(doc.getNameAliases('de'), [
'ding2','ding3',
], 'correctly mapped');

t.end(); // test will fail if not called (or called twice).
next();
}));

stream.write(doc);
});
};

// Cover the case of a tag key being 'name:' eg. { 'name:': 'foo' }
38 changes: 38 additions & 0 deletions test/util/parseSemicolonDelimitedValues.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
const parseSemicolonDelimitedValues = require('../../util/parseSemicolonDelimitedValues');

module.exports.tests = {};

// test exports
module.exports.tests.smoke = function (test, common) {
test('interface', t => {
t.equal(typeof parseSemicolonDelimitedValues, 'function', 'function');
t.end();
});
test('parse - invalid', t => {
t.deepEqual(parseSemicolonDelimitedValues(1), []);
t.deepEqual(parseSemicolonDelimitedValues(['a']), []);
t.deepEqual(parseSemicolonDelimitedValues([{'a': 'b'}]), []);
t.deepEqual(parseSemicolonDelimitedValues(undefined), []);
t.deepEqual(parseSemicolonDelimitedValues(null), []);
t.deepEqual(parseSemicolonDelimitedValues(''), []);
t.end();
});
test('parse - examples', t => {
t.deepEqual(parseSemicolonDelimitedValues(''), []);
t.deepEqual(parseSemicolonDelimitedValues(' '), []);
t.deepEqual(parseSemicolonDelimitedValues(' ;; ; ; ; ; ;; ; ;; '), []);
t.deepEqual(parseSemicolonDelimitedValues(' a; b ;;; ; '), ['a', 'b']);
t.end();
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
return tape('parseSemicolonDelimitedValues: ' + name, testFunction);
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common);
}
};
12 changes: 12 additions & 0 deletions util/parseSemicolonDelimitedValues.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
const _ = require('lodash');

// Split multi-value OSM tags into an Array
// https://wiki.openstreetmap.org/wiki/Talk:Semi-colon_value_separator
function parseSemicolonDelimitedValues(value) {
return (_.isString(value) ? value : '')
.split(';')
.map(v => v.trim())
.filter(v => v.length);
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

diff --git a/util/parseSemicolonDelimitedValues.js b/util/parseSemicolonDelimitedValues.js
index 08e21ac..39c123b 100644
--- a/util/parseSemicolonDelimitedValues.js
+++ b/util/parseSemicolonDelimitedValues.js
@@ -5,8 +5,8 @@ const _ = require('lodash');
 function parseSemicolonDelimitedValues(value) {
   return (_.isString(value) ? value : '')
     .split(';')
-    .map(Function.prototype.call, String.prototype.trim)
-    .filter(Boolean);
+    .map(v => v.trim())
+    .filter(v => v.length);
 }

 module.exports = parseSemicolonDelimitedValues;
\ No newline at end of file

module.exports = parseSemicolonDelimitedValues;