1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Helper function for parsing parameters and and outputting data
9 specifically for the v1 version of the API.
11 from typing import Tuple, Optional, Any, Dict, Iterable
12 from itertools import chain
15 from nominatim.api.results import SearchResult, SearchResults, SourceTable
16 from nominatim.api.types import SearchDetails, GeometryFormat
18 REVERSE_MAX_RANKS = [2, 2, 2, # 0-2 Continent/Sea
25 19, # 13 Village/Suburb
26 22, # 14 Hamlet/Neighbourhood
28 26, # 16 Major Streets
29 27, # 17 Minor Streets
34 def zoom_to_rank(zoom: int) -> int:
35 """ Convert a zoom parameter into a rank according to the v1 API spec.
37 return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
40 FEATURE_TYPE_TO_RANK: Dict[Optional[str], Any] = {
48 def feature_type_to_rank(feature_type: Optional[str]) -> Tuple[int, int]:
49 """ Convert a feature type parameter to a tuple of
50 feature type name, minimum rank and maximum rank.
52 return FEATURE_TYPE_TO_RANK.get(feature_type, (0, 30))
55 #pylint: disable=too-many-arguments
56 def extend_query_parts(queryparts: Dict[str, Any], details: Dict[str, Any],
57 feature_type: Optional[str],
58 namedetails: bool, extratags: bool,
59 excluded: Iterable[str]) -> None:
60 """ Add parameters from details dictionary to the query parts
61 dictionary which is suitable as URL parameter dictionary.
63 parsed = SearchDetails.from_kwargs(details)
64 if parsed.geometry_output != GeometryFormat.NONE:
65 for flag in parsed.geometry_output:
67 queryparts[f'polygon_{flag.name.lower()}'] = '1'
68 if parsed.address_details:
69 queryparts['addressdetails'] = '1'
71 queryparts['namedetails'] = '1'
73 queryparts['extratags'] = '1'
74 if parsed.geometry_simplification > 0.0:
75 queryparts['polygon_threshold'] = f"{parsed.geometry_simplification:.6g}"
76 if parsed.max_results != 10:
77 queryparts['limit'] = str(parsed.max_results)
79 queryparts['countrycodes'] = ','.join(parsed.countries)
80 queryparts['exclude_place_ids'] = \
81 ','.join(chain(excluded, map(str, parsed.excluded)))
83 queryparts['viewbox'] = ','.join(f"{c:.7g}" for c in parsed.viewbox.coords)
84 if parsed.bounded_viewbox:
85 queryparts['bounded'] = '1'
86 if not details['dedupe']:
87 queryparts['dedupe'] = '0'
88 if feature_type in FEATURE_TYPE_TO_RANK:
89 queryparts['featureType'] = feature_type
92 def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults:
93 """ Remove results that look like duplicates.
95 Two results are considered the same if they have the same OSM ID
96 or if they have the same category, display name and rank.
99 classification_done = set()
100 deduped = SearchResults()
101 for result in results:
102 if result.source_table == SourceTable.POSTCODE:
103 assert result.names and 'ref' in result.names
104 if any(_is_postcode_relation_for(r, result.names['ref']) for r in results):
106 classification = (result.osm_object[0] if result.osm_object else None,
110 if result.osm_object not in osm_ids_done \
111 and classification not in classification_done:
112 deduped.append(result)
113 osm_ids_done.add(result.osm_object)
114 classification_done.add(classification)
115 if len(deduped) >= max_results:
121 def _is_postcode_relation_for(result: SearchResult, postcode: str) -> bool:
122 return result.source_table == SourceTable.PLACEX \
123 and result.osm_object is not None \
124 and result.osm_object[0] == 'R' \
125 and result.category == ('boundary', 'postal_code') \
126 and result.names is not None \
127 and result.names.get('ref') == postcode
130 def _deg(axis:str) -> str:
131 return f"(?P<{axis}_deg>\\d+\\.\\d+)°?"
133 def _deg_min(axis: str) -> str:
134 return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)?[′']*"
136 def _deg_min_sec(axis: str) -> str:
137 return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)?[\"″]*"
139 COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?') for r in (
140 r"(?P<ns>[NS])\s*" + _deg('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg('lon'),
141 _deg('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg('lon') + r"\s*(?P<ew>[EW])",
142 r"(?P<ns>[NS])\s*" + _deg_min('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min('lon'),
143 _deg_min('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min('lon') + r"\s*(?P<ew>[EW])",
144 r"(?P<ns>[NS])\s*" + _deg_min_sec('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min_sec('lon'),
145 _deg_min_sec('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min_sec('lon') + r"\s*(?P<ew>[EW])",
146 r"\[?(?P<lat_deg>[+-]?\d+\.\d+)[\s,]+(?P<lon_deg>[+-]?\d+\.\d+)\]?"
149 def extract_coords_from_query(query: str) -> Tuple[str, Optional[float], Optional[float]]:
150 """ Look for something that is formated like a coordinate at the
151 beginning or end of the query. If found, extract the coordinate and
152 return the remaining query (or the empty string if the query
153 consisted of nothing but a coordinate).
155 Only the first match will be returned.
157 for regex in COORD_REGEX:
158 match = regex.fullmatch(query)
161 groups = match.groupdict()
162 if not groups['pre'] or not groups['post']:
163 x = float(groups['lon_deg']) \
164 + float(groups.get('lon_min', 0.0)) / 60.0 \
165 + float(groups.get('lon_sec', 0.0)) / 3600.0
166 if groups.get('ew') == 'W':
168 y = float(groups['lat_deg']) \
169 + float(groups.get('lat_min', 0.0)) / 60.0 \
170 + float(groups.get('lat_sec', 0.0)) / 3600.0
171 if groups.get('ns') == 'S':
173 return groups['pre'] or groups['post'] or '', x, y
175 return query, None, None
178 CATEGORY_REGEX = re.compile(r'(?P<pre>.*?)\[(?P<cls>[a-zA-Z_]+)=(?P<typ>[a-zA-Z_]+)\](?P<post>.*)')
180 def extract_category_from_query(query: str) -> Tuple[str, Optional[str], Optional[str]]:
181 """ Extract a hidden category specification of the form '[key=value]' from
182 the query. If found, extract key and value and
183 return the remaining query (or the empty string if the query
184 consisted of nothing but a category).
186 Only the first match will be returned.
188 match = CATEGORY_REGEX.search(query)
189 if match is not None:
190 return (match.group('pre').strip() + ' ' + match.group('post').strip()).strip(), \
191 match.group('cls'), match.group('typ')
193 return query, None, None