From: Sarah Hoffmann Date: Thu, 24 Aug 2023 12:24:16 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~51 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/878a1d756d34169609ab64ee228d164bbf3bfad7?hp=888a5ab2f69c6c06401ea7550b5de498debd2f35 Merge remote-tracking branch 'upstream/master' --- diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index 281b5128..70392d79 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -25,7 +25,7 @@ runs: shell: bash - name: Install${{ matrix.flavour }} prerequisites run: | - sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson + sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev if [ "$FLAVOUR" == "oldstuff" ]; then pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg else diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index d85359fa..abcd89d8 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -35,6 +35,7 @@ For compiling: * [bzip2](http://www.bzip.org/) * [zlib](https://www.zlib.net/) * [ICU](http://site.icu-project.org/) + * [nlohmann/json](https://json.nlohmann.me/) * [Boost libraries](https://www.boost.org/), including system and filesystem * PostgreSQL client libraries * a recent C++ compiler (gcc 5+ or Clang 3.8+) diff --git a/docs/api/Search.md b/docs/api/Search.md index 39864b24..b54c5302 100644 --- a/docs/api/Search.md +++ b/docs/api/Search.md @@ -174,116 +174,136 @@ This overrides the specified machine readable format. (Default: 0) ## Examples -##### XML with kml polygon +##### XML with KML polygon -* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1) +* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1) ```xml - - - - - - - -1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997 - - - - - 135 - Pilkington Avenue - Wylde Green - Sutton Coldfield - City of Birmingham - West Midlands (county) - B72 - United Kingdom - gb - - + + + + + + + + -1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566 + + + + + 135 + Pilkington Avenue + Maney + Sutton Coldfield + Wylde Green + Birmingham + GB-BIR + West Midlands Combined Authority + England + GB-ENG + B72 1LH + United Kingdom + gb + + ``` ##### JSON with SVG polygon -[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1) +[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1) ```json - { - "address": { - "city": "Berlin", - "city_district": "Mitte", - "construction": "Unter den Linden", - "continent": "European Union", - "country": "Deutschland", - "country_code": "de", - "house_number": "1", - "neighbourhood": "Scheunenviertel", - "postcode": "10117", - "public_building": "Kommandantenhaus", - "state": "Berlin", - "suburb": "Mitte" - }, - "boundingbox": [ - "52.5170783996582", - "52.5173187255859", - "13.3975105285645", - "13.3981599807739" - ], - "class": "amenity", - "display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union", - "importance": 0.73606775332943, - "lat": "52.51719785", - "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright", - "lon": "13.3978352028938", - "osm_id": "15976890", - "osm_type": "way", - "place_id": "30848715", - "svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z", - "type": "public_building" - } +[ + { + "address": { + "ISO3166-2-lvl4": "DE-BE", + "borough": "Mitte", + "city": "Berlin", + "country": "Deutschland", + "country_code": "de", + "historic": "Kommandantenhaus", + "house_number": "1", + "neighbourhood": "Friedrichswerder", + "postcode": "10117", + "road": "Unter den Linden", + "suburb": "Mitte" + }, + "boundingbox": [ + "52.5170798", + "52.5173311", + "13.3975116", + "13.3981577" + ], + "class": "historic", + "display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland", + "importance": 0.8135042058306902, + "lat": "52.51720765", + "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright", + "lon": "13.397834399325466", + "osm_id": 15976890, + "osm_type": "way", + "place_id": 108681845, + "svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z", + "type": "house" + } +] ``` ##### JSON with address details -[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1) +[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1) ```json - { - "address": { - "bakery": "B\u00e4cker Kamps", - "city_district": "Mitte", - "continent": "European Union", - "country": "Deutschland", - "country_code": "de", - "footway": "Bahnsteig U6", - "neighbourhood": "Sprengelkiez", - "postcode": "13353", - "state": "Berlin", - "suburb": "Wedding" - }, - "boundingbox": [ - "52.5460929870605", - "52.5460968017578", - "13.3591794967651", - "13.3591804504395" - ], - "class": "shop", - "display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union", - "icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png", - "importance": 0.201, - "lat": "52.5460941", - "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright", - "lon": "13.35918", - "osm_id": "317179427", - "osm_type": "node", - "place_id": "1453068", - "type": "bakery" - } +[ + { + "address": { + "ISO3166-2-lvl4": "DE-BE", + "borough": "Mitte", + "city": "Berlin", + "country": "Deutschland", + "country_code": "de", + "neighbourhood": "Sprengelkiez", + "postcode": "13347", + "road": "Lindower Straße", + "shop": "Ditsch", + "suburb": "Wedding" + }, + "addresstype": "shop", + "boundingbox": [ + "52.5427201", + "52.5427654", + "13.3668619", + "13.3669442" + ], + "category": "shop", + "display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland", + "importance": 9.99999999995449e-06, + "lat": "52.54274275", + "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright", + "lon": "13.36690305710228", + "name": "Ditsch", + "osm_id": 437595031, + "osm_type": "way", + "place_id": 204751033, + "place_rank": 30, + "type": "bakery" + } +] ``` ##### GeoJSON diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py index 377c4be7..c9e48b0f 100644 --- a/nominatim/api/search/db_search_builder.py +++ b/nominatim/api/search/db_search_builder.py @@ -212,39 +212,27 @@ class SearchBuilder: yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens) return - exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count - # Partial term to frequent. Try looking up by rare full names first. name_fulls = self.query.get_tokens(name, TokenType.WORD) - rare_names = list(filter(lambda t: t.count < 10000, name_fulls)) + fulls_count = sum(t.count for t in name_fulls) / (2**len(addr_partials)) # At this point drop unindexed partials from the address. # This might yield wrong results, nothing we can do about that. if not partials_indexed: addr_tokens = [t.token for t in addr_partials if t.is_indexed] penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed) - if rare_names: - # Any of the full names applies with all of the partials from the address - yield penalty, sum(t.count for t in rare_names),\ - dbf.lookup_by_any_name([t.token for t in rare_names], addr_tokens) + # Any of the full names applies with all of the partials from the address + yield penalty, fulls_count,\ + dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens, + 'restrict' if fulls_count < 10000 else 'lookup_all') # To catch remaining results, lookup by name and address # We only do this if there is a reasonable number of results expected. - if exp_count < 10000: - if all(t.is_indexed for t in name_partials): - lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')] - else: - # we don't have the partials, try with the non-rare names - non_rare_names = [t.token for t in name_fulls if t.count >= 10000] - if not non_rare_names: - return - lookup = [dbf.FieldLookup('name_vector', non_rare_names, 'lookup_any')] + exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count + if exp_count < 10000 and all(t.is_indexed for t in name_partials): + lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')] if addr_tokens: lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')) - penalty += 0.1 * max(0, 5 - len(name_partials) - len(addr_tokens)) - if len(rare_names) == len(name_fulls): - # if there already was a search for all full tokens, - # avoid this if anything has been found - penalty += 0.25 + penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens)) yield penalty, exp_count, lookup diff --git a/nominatim/api/search/db_search_fields.py b/nominatim/api/search/db_search_fields.py index 2b2e3e56..612e9059 100644 --- a/nominatim/api/search/db_search_fields.py +++ b/nominatim/api/search/db_search_fields.py @@ -224,14 +224,15 @@ def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[Fiel return lookup -def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]: +def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int], + lookup_type: str) -> List[FieldLookup]: """ Create a lookup list where name tokens are looked up via index and only one of the name tokens must be present. Potential address tokens are used to restrict the search further. """ lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')] if addr_tokens: - lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict')) + lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type)) return lookup diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 34a4037a..4d893962 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -627,6 +627,11 @@ class PlaceSearch(AbstractSearch): sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM)) else: sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM)) + elif self.expected_count >= 10000: + if details.viewbox.area < 0.5: + sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM)) + else: + sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM)) else: penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), @@ -643,13 +648,18 @@ class PlaceSearch(AbstractSearch): .label('importance')) sql = sql.order_by(sa.desc(sa.text('importance'))) else: - sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance), - else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40))) + if self.expected_count < 10000\ + or (details.viewbox is not None and details.viewbox.area < 0.5): + sql = sql.order_by( + penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance), + else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40))) sql = sql.add_columns(t.c.importance) - sql = sql.add_columns(penalty.label('accuracy'))\ - .order_by(sa.text('accuracy')) + sql = sql.add_columns(penalty.label('accuracy')) + + if self.expected_count < 10000: + sql = sql.order_by(sa.text('accuracy')) if self.housenumbers: hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M" diff --git a/nominatim/api/v1/server_glue.py b/nominatim/api/v1/server_glue.py index cf9bc3af..95484c5b 100644 --- a/nominatim/api/v1/server_glue.py +++ b/nominatim/api/v1/server_glue.py @@ -25,17 +25,18 @@ from nominatim.api.v1.format import dispatch as formatting from nominatim.api.v1.format import RawDataList from nominatim.api.v1 import helpers -CONTENT_TYPE = { - 'text': 'text/plain; charset=utf-8', - 'xml': 'text/xml; charset=utf-8', - 'debug': 'text/html; charset=utf-8' -} +CONTENT_TEXT = 'text/plain; charset=utf-8' +CONTENT_XML = 'text/xml; charset=utf-8' +CONTENT_HTML = 'text/html; charset=utf-8' +CONTENT_JSON = 'application/json; charset=utf-8' + +CONTENT_TYPE = {'text': CONTENT_TEXT, 'xml': CONTENT_XML, 'debug': CONTENT_HTML} class ASGIAdaptor(abc.ABC): """ Adapter class for the different ASGI frameworks. Wraps functionality over concrete requests and responses. """ - content_type: str = 'text/plain; charset=utf-8' + content_type: str = CONTENT_TEXT @abc.abstractmethod def get(self, name: str, default: Optional[str] = None) -> Optional[str]: @@ -85,13 +86,13 @@ class ASGIAdaptor(abc.ABC): """ Create a response from the given output. Wraps a JSONP function around the response, if necessary. """ - if self.content_type == 'application/json' and status == 200: + if self.content_type == CONTENT_JSON and status == 200: jsonp = self.get('json_callback') if jsonp is not None: if any(not part.isidentifier() for part in jsonp.split('.')): self.raise_error('Invalid json_callback value') output = f"{jsonp}({output})" - self.content_type = 'application/javascript' + self.content_type = 'application/javascript; charset=utf-8' return self.create_response(status, output, num_results) @@ -101,16 +102,16 @@ class ASGIAdaptor(abc.ABC): message. The message will be formatted according to the output format chosen by the request. """ - if self.content_type == 'text/xml; charset=utf-8': + if self.content_type == CONTENT_XML: msg = f""" {status} {msg} """ - elif self.content_type == 'application/json': + elif self.content_type == CONTENT_JSON: msg = f"""{{"error":{{"code":{status},"message":"{msg}"}}}}""" - elif self.content_type == 'text/html; charset=utf-8': + elif self.content_type == CONTENT_HTML: loglib.log().section('Execution error') loglib.log().var_dump('Status', status) loglib.log().var_dump('Message', msg) @@ -204,7 +205,7 @@ class ASGIAdaptor(abc.ABC): """ if self.get_bool('debug', False): loglib.set_log_output('html') - self.content_type = 'text/html; charset=utf-8' + self.content_type = CONTENT_HTML return True return False @@ -234,7 +235,7 @@ class ASGIAdaptor(abc.ABC): self.raise_error("Parameter 'format' must be one of: " + ', '.join(formatting.list_formats(result_type))) - self.content_type = CONTENT_TYPE.get(fmt, 'application/json') + self.content_type = CONTENT_TYPE.get(fmt, CONTENT_JSON) return fmt diff --git a/osm2pgsql b/osm2pgsql index 4facd1ae..ea0178e9 160000 --- a/osm2pgsql +++ b/osm2pgsql @@ -1 +1 @@ -Subproject commit 4facd1aea451cea220261c361698b8e5f18a9327 +Subproject commit ea0178e97d5b69a87a8b9c35210c8be4674e60e6 diff --git a/test/python/api/test_server_glue_v1.py b/test/python/api/test_server_glue_v1.py index fe406c42..5a7430f4 100644 --- a/test/python/api/test_server_glue_v1.py +++ b/test/python/api/test_server_glue_v1.py @@ -67,7 +67,7 @@ def test_adaptor_parse_format_use_configured(): adaptor = FakeAdaptor(params={'format': 'json'}) assert adaptor.parse_format(napi.StatusResult, 'text') == 'json' - assert adaptor.content_type == 'application/json' + assert adaptor.content_type == 'application/json; charset=utf-8' def test_adaptor_parse_format_invalid_value(): @@ -132,7 +132,7 @@ class TestAdaptorRaiseError: def test_json(self): - self.adaptor.content_type = 'application/json' + self.adaptor.content_type = 'application/json; charset=utf-8' err = self.run_raise_error('TEST', 501) @@ -189,7 +189,7 @@ def test_build_response_with_status(): assert isinstance(resp, FakeResponse) assert resp.status == 404 assert resp.output == 'stuff\nmore stuff' - assert resp.content_type == 'application/json' + assert resp.content_type == 'application/json; charset=utf-8' def test_build_response_jsonp_with_json(): @@ -201,7 +201,7 @@ def test_build_response_jsonp_with_json(): assert isinstance(resp, FakeResponse) assert resp.status == 200 assert resp.output == 'test.func({})' - assert resp.content_type == 'application/javascript' + assert resp.content_type == 'application/javascript; charset=utf-8' def test_build_response_jsonp_without_json(): @@ -270,7 +270,7 @@ class TestStatusEndpoint: assert isinstance(resp, FakeResponse) assert resp.status == 200 - assert resp.content_type == 'application/json' + assert resp.content_type == 'application/json; charset=utf-8' @pytest.mark.asyncio diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh index 0f664da2..78c42007 100755 --- a/vagrant/Install-on-Ubuntu-20.sh +++ b/vagrant/Install-on-Ubuntu-20.sh @@ -23,7 +23,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev \ libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \ - postgresql-12-postgis-3 \ + nlohmann-json3-dev postgresql-12-postgis-3 \ postgresql-contrib-12 postgresql-12-postgis-3-scripts \ php-cli php-pgsql php-intl libicu-dev python3-dotenv \ python3-psycopg2 python3-psutil python3-jinja2 python3-pip \ diff --git a/vagrant/Install-on-Ubuntu-22.sh b/vagrant/Install-on-Ubuntu-22.sh index b170daad..19e698e0 100755 --- a/vagrant/Install-on-Ubuntu-22.sh +++ b/vagrant/Install-on-Ubuntu-22.sh @@ -23,12 +23,12 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev \ libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \ - postgresql-server-dev-14 postgresql-14-postgis-3 \ + nlohmann-json3-dev postgresql-14-postgis-3 \ postgresql-contrib-14 postgresql-14-postgis-3-scripts \ php-cli php-pgsql php-intl libicu-dev python3-dotenv \ python3-psycopg2 python3-psutil python3-jinja2 \ python3-icu python3-datrie python3-sqlalchemy \ - python3-asyncpg git + python3-asyncpg python3-yaml git # # System Configuration