From f5977dac75bde4ab5b7bddf0c1834e1b7c688d64 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 13 May 2021 12:19:20 +0200 Subject: [PATCH] ignore invalid coordinates in external postcodes --- nominatim/tools/postcodes.py | 15 ++++++++++++++- test/python/test_tools_postcodes.py | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index 9bcdf2e5..195d407e 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -5,6 +5,7 @@ of artificial postcode centroids. import csv import gzip import logging +from math import isfinite from psycopg2.extras import execute_values @@ -12,6 +13,17 @@ from nominatim.db.connection import connect LOG = logging.getLogger() +def _to_float(num, max_value): + """ Convert the number in string into a float. The number is expected + to be in the range of [-max_value, max_value]. Otherwise rises a + ValueError. + """ + num = float(num) + if not isfinite(num) or num <= -max_value or num >= max_value: + raise ValueError() + + return num + class _CountryPostcodesCollector: """ Collector for postcodes of a single country. """ @@ -108,7 +120,8 @@ class _CountryPostcodesCollector: postcode = analyzer.normalize_postcode(row['postcode']) if postcode not in self.collected: try: - self.collected[postcode] = (float(row['lon']), float(row['lat'])) + self.collected[postcode] = (_to_float(row['lon'], 180), + _to_float(row['lat'], 90)) except ValueError: LOG.warning("Bad coordinates %s, %s in %s country postcode file.", row['lat'], row['lon'], self.country) diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py index e0a62ec7..adbc0e74 100644 --- a/test/python/test_tools_postcodes.py +++ b/test/python/test_tools_postcodes.py @@ -157,3 +157,29 @@ def test_import_postcodes_extern(dsn, placex_table, postcode_table, tmp_path, assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12), ('xx', 'CD 4511', -10, -5)} + +def test_import_postcodes_extern_bad_column(dsn, placex_table, postcode_table, + tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)', + address=dict(postcode='AB 4511')) + + extfile = tmp_path / 'xx_postcodes.csv' + extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10") + + postcodes.update_postcodes(dsn, tmp_path, tokenizer) + + assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)} + + +def test_import_postcodes_extern_bad_number(dsn, placex_table, postcode_table, + tmp_path, tokenizer): + placex_table.add(country='xx', geom='POINT(10 12)', + address=dict(postcode='AB 4511')) + + extfile = tmp_path / 'xx_postcodes.csv' + extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0") + + postcodes.update_postcodes(dsn, tmp_path, tokenizer) + + assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12), + ('xx', 'CD 4511', -10, -5)} -- 2.39.5