penalty = min(categories.penalties)
categories.penalties = [p - penalty for p in categories.penalties]
for search in builder:
- yield dbs.NearSearch(penalty, categories, search)
+ yield dbs.NearSearch(penalty + assignment.penalty, categories, search)
else:
- yield from builder
+ for search in builder:
+ search.penalty += assignment.penalty
+ yield search
def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
return
- exp_count = min(exp_count, min(t.count for t in addr_partials)) \
- if addr_partials else exp_count
+ exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
# Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, TokenType.WORD)
"""
def __init__(self, sdata: SearchData) -> None:
super().__init__(sdata.penalty)
- self.categories = sdata.qualifiers
+ self.qualifiers = sdata.qualifiers
self.countries = sdata.countries
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
.limit(LIMIT_PARAM)
- classtype = self.categories.values
+ classtype = self.qualifiers.values
if len(classtype) == 1:
cclass, ctype = classtype[0]
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
rows.extend(await conn.execute(sql, bind_params))
else:
# use the class type tables
- for category in self.categories.values:
+ for category in self.qualifiers.values:
table = await conn.get_class_table(*category)
if table is not None:
sql = _select_placex(t)\
for row in rows:
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
- result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
+ result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
if details.viewbox is not None:
if details.bounded_viewbox:
- sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+ if details.viewbox.area < 0.2:
+ sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+ else:
+ sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
else:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
if details.near is not None:
if details.near_radius is not None:
- sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+ if details.near_radius < 0.1:
+ sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+ else:
+ sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
+ NEAR_RADIUS_PARAM))
sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
.label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance')))
# pylint: disable=invalid-name,too-many-locals
def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
start: int = 0) -> Iterator[Optional[List[Any]]]:
- yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
+ yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
+ 'Qualifier', 'Catgeory', 'Rankings']
def tk(tl: List[int]) -> str:
tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
for search in searches[start:]:
fields = ('lookups', 'rankings', 'countries', 'housenumbers',
- 'postcodes', 'qualifier')
- iters = itertools.zip_longest([f"{search.penalty:.3g}"],
- *(getattr(search, attr, []) for attr in fields),
- fillvalue= '')
- for penalty, lookup, rank, cc, hnr, pc, qual in iters:
+ 'postcodes', 'qualifiers')
+ if hasattr(search, 'search'):
+ iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+ *(getattr(search.search, attr, []) for attr in fields),
+ getattr(search, 'categories', []),
+ fillvalue='')
+ else:
+ iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+ *(getattr(search, attr, []) for attr in fields),
+ [],
+ fillvalue='')
+ for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
- fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
+ fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
yield None
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
distance = 0
for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
- if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)):
+ if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
distance += 1
elif tag == 'replace':
distance += max((ato-afrom), (bto-bfrom))
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1):
return False
+ if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq):
+ self.penalty += 1.0
return True
def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
- return sa.func.ST_DWithin(self, other, distance, type_=sa.Float)
+ return sa.func.ST_DWithin(self, other, distance, type_=sa.Boolean)
+
+
+ def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
+ return sa.func._ST_DWithin(self, other, distance, type_=sa.Boolean)
+
+
+ def ST_Intersects_no_index(self, other: SaColumn) -> SaColumn:
+ return sa.func._ST_Intersects(self, other, type_=sa.Boolean)
def ST_Distance(self, other: SaColumn) -> SaColumn:
search = searches[0]
assert isinstance(search, dbs.PoiSearch)
- assert search.categories.values == [('this', 'that')]
+ assert search.qualifiers.values == [('this', 'that')]
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
def test_too_frequent_partials_in_name_and_address():
- searches = make_counted_searches(10000, 1, 10000, 1)
+ searches = make_counted_searches(20000, 1, 10000, 1)
assert len(searches) == 1