Merge remote-tracking branch 'upstream/master'

author Sarah Hoffmann <lonvia@denofr.de>

Fri, 21 Mar 2025 10:41:52 +0000 (11:41 +0100)

committer Sarah Hoffmann <lonvia@denofr.de>

Fri, 21 Mar 2025 10:41:52 +0000 (11:41 +0100)
author Sarah Hoffmann <lonvia@denofr.de>
Fri, 21 Mar 2025 10:41:52 +0000 (11:41 +0100)
committer Sarah Hoffmann <lonvia@denofr.de>
Fri, 21 Mar 2025 10:41:52 +0000 (11:41 +0100)
diff --git a/src/nominatim_api/core.py b/src/nominatim_api/core.py

index a71bca6e981f664465e1b5a76459619f24b51570..f8941bccb38fd5f1d02db6137e857436ad99ad2b 100644 (file)
--- a/src/nominatim_api/core.py
+++ b/src/nominatim_api/core.py
@@ -304,7 +304,7 @@ class NominatimAPIAsync:
              else:
                  details.restrict_min_max_rank(4, 4)
  
-            if 'layers' not in params:
+            if details.layers is None:
                  details.layers = ntyp.DataLayer.ADDRESS
                  if amenity:
                      details.layers |= ntyp.DataLayer.POI
diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py

index 69455d77a0d3c14aa79d168fd053e045e80ad39a..8901529ffecc1de9ccbfae747dddc6fd37dd697b 100644 (file)
--- a/src/nominatim_api/search/geocoder.py
+++ b/src/nominatim_api/search/geocoder.py
@@ -238,7 +238,7 @@ def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
          if not lk:
              return ''
  
-        return f"{lk.lookup_type}({lk.column}{tk(lk.tokens)})"
+        return f"{lk.lookup_type.__name__}({lk.column}{tk(lk.tokens)})"
  
      def fmt_cstr(c: Any) -> str:
          if not c:
diff --git a/src/nominatim_api/search/postcode_parser.py b/src/nominatim_api/search/postcode_parser.py

index bb3ef1a4c86781ae9afb2bb478fae70669400053..fb8b3804e7107ed1f121805ea2d37af0b833b1fe 100644 (file)
--- a/src/nominatim_api/search/postcode_parser.py
+++ b/src/nominatim_api/search/postcode_parser.py
@@ -34,14 +34,14 @@ class PostcodeParser:
          unique_patterns: Dict[str, Dict[str, List[str]]] = {}
          for cc, data in cdata.items():
              if data.get('postcode'):
-                pat = data['postcode']['pattern'].replace('d', '[0-9]').replace('l', '[a-z]')
+                pat = data['postcode']['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
                  out = data['postcode'].get('output')
                  if pat not in unique_patterns:
                      unique_patterns[pat] = defaultdict(list)
-                unique_patterns[pat][out].append(cc)
+                unique_patterns[pat][out].append(cc.upper())
  
          self.global_pattern = re.compile(
-                '(?:(?P<cc>[a-z][a-z])(?P<space>[ -]?))?(?P<pc>(?:(?:'
+                '(?:(?P<cc>[A-Z][A-Z])(?P<space>[ -]?))?(?P<pc>(?:(?:'
                  + ')|(?:'.join(unique_patterns) + '))[:, >].*)')
  
          self.local_patterns = [(re.compile(f"{pat}[:, >]"), list(info.items()))
@@ -57,25 +57,26 @@ class PostcodeParser:
          nodes = query.nodes
          outcodes: Set[Tuple[int, int, str]] = set()
  
+        terms = [n.term_normalized.upper() + n.btype for n in nodes]
          for i in range(query.num_token_slots()):
              if nodes[i].btype in '<,: ' and nodes[i + 1].btype != '`' \
                      and (i == 0 or nodes[i - 1].ptype != qmod.PHRASE_POSTCODE):
                  if nodes[i].ptype == qmod.PHRASE_ANY:
-                    word = nodes[i + 1].term_normalized + nodes[i + 1].btype
+                    word = terms[i + 1]
                      if word[-1] in ' -' and nodes[i + 2].btype != '`' \
                              and nodes[i + 1].ptype == qmod.PHRASE_ANY:
-                        word += nodes[i + 2].term_normalized + nodes[i + 2].btype
+                        word += terms[i + 2]
                          if word[-1] in ' -' and nodes[i + 3].btype != '`' \
                                  and nodes[i + 2].ptype == qmod.PHRASE_ANY:
-                            word += nodes[i + 3].term_normalized + nodes[i + 3].btype
+                            word += terms[i + 3]
  
                      self._match_word(word, i, False, outcodes)
                  elif nodes[i].ptype == qmod.PHRASE_POSTCODE:
-                    word = nodes[i + 1].term_normalized + nodes[i + 1].btype
+                    word = terms[i + 1]
                      for j in range(i + 1, query.num_token_slots()):
                          if nodes[j].ptype != qmod.PHRASE_POSTCODE:
                              break
-                        word += nodes[j + 1].term_normalized + nodes[j + 1].btype
+                        word += terms[j + 1]
  
                      self._match_word(word, i, True, outcodes)
  
@@ -98,6 +99,6 @@ class PostcodeParser:
                      for out, out_ccs in info:
                          if cc is None or cc in out_ccs:
                              if out:
-                                outcodes.add((*trange, lm.expand(out).upper()))
+                                outcodes.add((*trange, lm.expand(out)))
                              else:
-                                outcodes.add((*trange, lm.group(0)[:-1].upper()))
+                                outcodes.add((*trange, lm.group(0)[:-1]))
diff --git a/test/python/api/search/test_postcode_parser.py b/test/python/api/search/test_postcode_parser.py

index 38638e075f53e1d816e0cafcd8d4813d8707966b..f7d78857b0020d56f3c7769756e0320d387753e6 100644 (file)
--- a/test/python/api/search/test_postcode_parser.py
+++ b/test/python/api/search/test_postcode_parser.py
@@ -52,6 +52,12 @@ ky:
    postcode:
      pattern: "(d)-(dddd)"
      output: KY\1-\2
+
+gb:
+  postcode:
+    pattern: "(l?ld[A-Z0-9]?) ?(dll)"
+    output: \1 \2
+
      """)
  
      return project_env
@@ -83,6 +89,13 @@ def test_simple_postcode(pc_config, query, pos):
      assert result == {(pos, pos + 1, '45325'), (pos, pos + 1, '453 25')}
  
  
+@pytest.mark.parametrize('query', ['EC1R 3HF', 'ec1r 3hf'])
+def test_postcode_matching_case_insensitive(pc_config, query):
+    parser = PostcodeParser(pc_config)
+
+    assert parser.parse(mk_query(query)) == {(0, 2, 'EC1R 3HF')}
+
+
  def test_contained_postcode(pc_config):
      parser = PostcodeParser(pc_config)
author	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 21 Mar 2025 10:41:52 +0000 (11:41 +0100)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 21 Mar 2025 10:41:52 +0000 (11:41 +0100)
src/nominatim_api/core.py		patch \| blob \| history
src/nominatim_api/search/geocoder.py		patch \| blob \| history
src/nominatim_api/search/postcode_parser.py		patch \| blob \| history
test/python/api/search/test_postcode_parser.py		patch \| blob \| history