public function checkStatus()
{
- $sSQL = "SELECT word_id FROM word WHERE word_token == 'a'";
+ $sSQL = "SELECT word_id FROM word limit 1";
$iWordID = $this->oDB->getOne($sSQL);
if ($iWordID === false) {
throw new Exception('Query failed', 703);
private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
{
// Check which tokens we have, get the ID numbers
- $sSQL = 'SELECT word_id, word_token, type';
+ $sSQL = 'SELECT word_id, word_token, type,';
$sSQL .= " info->>'cc' as country, info->>'postcode' as postcode,";
$sSQL .= " info->>'op' as operator,";
- $sSQL .= " info->>'class' as class, info->>'type' as type,";
+ $sSQL .= " info->>'class' as class, info->>'type' as ctype,";
$sSQL .= " info->>'count' as count";
$sSQL .= ' FROM word WHERE word_token in (';
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
foreach ($aDBWords as $aWord) {
$iId = (int) $aWord['word_id'];
+ $sTok = $aWord['word_token'];
switch ($aWord['type']) {
- 'C': // country name tokens
- if ($aWord['country'] === null
- || ($this->aCountryRestriction
- && !in_array($aWord['country'], $this->aCountryRestriction))
+ case 'C': // country name tokens
+ if ($aWord['country'] !== null
+ && (!$this->aCountryRestriction
+ || in_array($aWord['country'], $this->aCountryRestriction))
) {
- continue;
+ $oValidTokens->addToken($sTok, new Token\Country($iId, $aWord['country']));
}
- $oToken = new Token\Country($iId, $aWord['country'])
break;
- 'H': // house number tokens
- $oToken = new Token\HouseNumber($iId, $aWord['word_token']);
+ case 'H': // house number tokens
+ $oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $aWord['word_token']));
break;
- 'P': // postcode tokens
+ case 'P': // postcode tokens
// Postcodes are not normalized, so they may have content
// that makes SQL injection possible. Reject postcodes
// that would need special escaping.
- if ($aWord['postcode'] === null
- || pg_escape_string($aWord['postcode']) == $aWord['postcode']
+ if ($aWord['postcode'] !== null
+ && pg_escape_string($aWord['postcode']) == $aWord['postcode']
) {
- continue;
+ $sNormPostcode = $this->normalizeString($aWord['postcode']);
+ if (strpos($sNormQuery, $sNormPostcode) !== false) {
+ $oValidTokens->addToken($sTok, new Token\Postcode($iId, $aWord['postcode'], null));
+ }
}
- $sNormPostcode = $this->normalizeString($aWord['postcode']);
- if (strpos($sNormQuery, $sNormPostcode) === false) {
- continue;
- }
- $oToken = new Token\Postcode($iId, $aWord['postcode'], null);
break;
- 'S': // tokens for classification terms (special phrases)
- if ($aWord['class'] === null || $aWord['type'] === null
- ) {
- continue;
+ case 'S': // tokens for classification terms (special phrases)
+ if ($aWord['class'] !== null && $aWord['ctype'] !== null) {
+ $oValidTokens->addToken($sTok, new Token\SpecialTerm(
+ $iId,
+ $aWord['class'],
+ $aWord['ctype'],
+ (isset($aWord['op'])) ? Operator::NEAR : Operator::NONE
+ ));
}
- $oToken = new Token\SpecialTerm(
- $iId,
- $aWord['class'],
- $aWord['type'],
- $aWord['op'] ? Operator::NEAR : Operator::NONE
- );
break;
- 'W': // full-word tokens
- $oToken = new Token\Word(
+ case 'W': // full-word tokens
+ $oValidTokens->addToken($sTok, new Token\Word(
$iId,
(int) $aWord['count'],
substr_count($aWord['word_token'], ' ')
- );
+ ));
break;
- 'w': // partial word terms
- $oToken = new Token\Partial(
+ case 'w': // partial word terms
+ $oValidTokens->addToken($sTok, new Token\Partial(
$iId,
$aWord['word_token'],
(int) $aWord['count']
- );
+ ));
break;
default:
- continue;
+ break;
}
-
- $oValidTokens->addToken($aWord['word_token'], $oToken);
}
}
| country | postcode | geometry |
| de | 01982 | country:de |
| ch | 4567 | country:ch |
- And word contains
- | word | class | type |
- | 01982 | place | postcode |
- | 4567 | place | postcode |
+ And there are word tokens for postcodes 01982,4567
Scenario: When the last postcode is deleted, it is deleted from postcode and word
Given the places
Then location_postcode contains exactly
| country | postcode | geometry |
| ch | 4567 | country:ch |
- And word contains not
- | word | class | type |
- | 01982 | place | postcode |
- And word contains
- | word | class | type |
- | 4567 | place | postcode |
+ And there are word tokens for postcodes 4567
+ And there are no word tokens for postcodes 01982
Scenario: A postcode is not deleted from postcode and word when it exist in another country
Given the places
Then location_postcode contains exactly
| country | postcode | geometry |
| ch | 01982 | country:ch |
- And word contains
- | word | class | type |
- | 01982 | place | postcode |
+ And there are word tokens for postcodes 01982
Scenario: Updating a postcode is reflected in postcode table
Given the places
Then location_postcode contains exactly
| country | postcode | geometry |
| de | 20453 | country:de |
- And word contains
- | word | class | type |
- | 20453 | place | postcode |
+ And there are word tokens for postcodes 20453
Scenario: When changing from a postcode type, the entry appears in placex
When importing
Then location_postcode contains exactly
| country | postcode | geometry |
| de | 20453 | country:de |
- And word contains
- | word | class | type |
- | 20453 | place | postcode |
+ And there are word tokens for postcodes 20453
Scenario: When changing to a postcode type, the entry disappears from placex
When importing
Then location_postcode contains exactly
| country | postcode | geometry |
| de | 01982 | country:de |
- And word contains
- | word | class | type |
- | 01982 | place | postcode |
+ And there are word tokens for postcodes 01982
else:
assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)
+
+@then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
+def check_word_table_for_postcodes(context, exclude, postcodes):
+ """ Check that the tokenizer produces postcode tokens for the given
+ postcodes. The postcodes are a comma-separated list of postcodes.
+ Whitespace matters.
+ """
+ nctx = context.nominatim
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
+ with tokenizer.name_analyzer() as ana:
+ plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
+
+ plist.sort()
+
+ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+ if nctx.tokenizer == 'legacy_icu':
+ cur.execute("""SELECT info->>'postcode' FROM word
+ WHERE type = 'P' and info->>'postcode' = any(%s)""",
+ (plist,))
+ else:
+ cur.execute("""SELECT word FROM word WHERE word = any(%s)
+ and class = 'place' and type = 'postcode'""",
+ (plist,))
+
+ found = [row[0] for row in cur]
+ assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
+
+ if exclude:
+ assert len(found) == 0, f"Unexpected postcodes: {found}"
+ else:
+ assert set(found) == set(plist), \
+ f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
+
@then("place_addressline contains")
def check_place_addressline(context):
""" Check the contents of the place_addressline table. Each row represents