RETURN trim((avals(name))[array_length(avals(name), 1)]);
END;
$$
- LANGUAGE plpgsql IMMUTABLE;
+ LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
--housenumber only needed for tiger data
RETURN array_to_string(result,', ');
END;
$$
- LANGUAGE plpgsql STABLE;
+ LANGUAGE plpgsql STABLE PARALLEL SAFE;
DROP TYPE IF EXISTS addressdata_place;
CREATE TYPE addressdata_place AS (
FOR location IN
SELECT placex.place_id, osm_type, osm_id, name, class, type,
coalesce(extratags->'linked_place', extratags->'place') as place_type,
- admin_level, fromarea, isaddress,
+ admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress,
CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
distance, country_code, postcode
FROM place_addressline join placex on (address_place_id = placex.place_id)
RETURN;
END;
$$
- LANGUAGE plpgsql STABLE;
+ LANGUAGE plpgsql STABLE PARALLEL SAFE;
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
return
- addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 30000
+ addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 50000
# Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, qmod.TOKEN_WORD)
if name_fulls:
fulls_count = sum(t.count for t in name_fulls)
- if fulls_count < 50000 or addr_count < 30000:
+ if fulls_count < 80000 or addr_count < 50000:
yield penalty, fulls_count / (2**len(addr_tokens)), \
self.get_full_name_ranking(name_fulls, addr_partials,
fulls_count > 30000 / max(1, len(addr_tokens)))
# To catch remaining results, lookup by name and address
# We only do this if there is a reasonable number of results expected.
- exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
+ exp_count /= 2**len(addr_tokens)
if exp_count < 10000 and addr_count < 20000:
penalty += 0.35 * max(1 if name_fulls else 0.1,
5 - len(name_partials) - len(addr_tokens))
# This might yield wrong results, nothing we can do about that.
if use_lookup:
addr_restrict_tokens = []
- addr_lookup_tokens = []
- for t in addr_partials:
- if t.addr_count > 20000:
- addr_restrict_tokens.append(t.token)
- else:
- addr_lookup_tokens.append(t.token)
+ addr_lookup_tokens = [t.token for t in addr_partials]
else:
addr_restrict_tokens = [t.token for t in addr_partials]
addr_lookup_tokens = []
log().section('Analyze query (using ICU tokenizer)')
for func in self.preprocessors:
phrases = func(phrases)
+
+ if len(phrases) == 1 \
+ and phrases[0].text.count(' ') > 3 \
+ and max(len(s) for s in phrases[0].text.split()) < 3:
+ normalized = []
+
query = qmod.QueryStruct(phrases)
log().var_dump('Normalized query', query.source)
self.add_extra_tokens(query)
for start, end, pc in self.postcode_parser.parse(query):
+ term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1])
query.add_token(qmod.TokenRange(start, end),
qmod.TOKEN_POSTCODE,
ICUToken(penalty=0.1, token=0, count=1, addr_count=1,
- lookup_word=pc, word_token=pc, info=None))
+ lookup_word=pc, word_token=term,
+ info=None))
self.rerank_tokens(query)
log().table_dump('Word tokens', _dump_word_tokens(query))
"""
for i, node, tlist in query.iter_token_lists():
if tlist.ttype == qmod.TOKEN_POSTCODE:
+ tlen = len(cast(ICUToken, tlist.tokens[0]).word_token)
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TOKEN_POSTCODE \
- and (repl.ttype != qmod.TOKEN_HOUSENUMBER
- or len(tlist.tokens[0].lookup_word) > 4):
+ and (repl.ttype != qmod.TOKEN_HOUSENUMBER or tlen > 4):
repl.add_penalty(0.39)
elif (tlist.ttype == qmod.TOKEN_HOUSENUMBER
and len(tlist.tokens[0].lookup_word) <= 3):