3 * SPDX-License-Identifier: GPL-2.0-only
5 * This file is part of Nominatim. (https://nominatim.org)
7 * Copyright (C) 2022 by the Nominatim developer community.
8 * For a full list of authors see the git log.
13 require_once(CONST_LibDir.'/SimpleWordList.php');
17 public function containsAny($sTerm)
23 // phpcs:ignore PSR1.Classes.ClassDeclaration.MultipleClasses
24 class TokensPartialSet
26 public function __construct($aTokens)
28 $this->aTokens = array_flip($aTokens);
31 public function containsAny($sTerm)
33 return isset($this->aTokens[$sTerm]);
37 // phpcs:ignore PSR1.Classes.ClassDeclaration.MultipleClasses
38 class SimpleWordListTest extends \PHPUnit\Framework\TestCase
42 private function serializeSets($aSets)
45 foreach ($aSets as $aSet) {
46 $aParts[] = '(' . join('|', $aSet) . ')';
48 return join(',', $aParts);
52 public function testEmptyPhrase()
54 $oList = new SimpleWordList('');
55 $this->assertNull($oList->getWordSets(new TokensFullSet()));
59 public function testSingleWordPhrase()
61 $oList = new SimpleWordList('a');
65 $this->serializeSets($oList->getWordSets(new TokensFullSet()))
70 public function testMultiWordPhrase()
72 $oList = new SimpleWordList('a b');
75 $this->serializeSets($oList->getWordSets(new TokensFullSet()))
78 $oList = new SimpleWordList('a b c');
80 '(a b c),(a b|c),(a|b c),(a|b|c)',
81 $this->serializeSets($oList->getWordSets(new TokensFullSet()))
84 $oList = new SimpleWordList('a b c d');
86 '(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
87 $this->serializeSets($oList->getWordSets(new TokensFullSet()))
91 public function testCmpByArraylen()
93 // Array elements are phrases, we want to sort so longest phrases are first
94 $aList1 = array('hackney', 'bridge', 'london', 'england');
95 $aList2 = array('hackney', 'london', 'bridge');
96 $aList3 = array('bridge', 'hackney', 'london', 'england');
98 $this->assertEquals(0, \Nominatim\SimpleWordList::cmpByArraylen($aList1, $aList1));
100 // list2 "wins". Less array elements
101 $this->assertEquals(1, \Nominatim\SimpleWordList::cmpByArraylen($aList1, $aList2));
102 $this->assertEquals(-1, \Nominatim\SimpleWordList::cmpByArraylen($aList2, $aList3));
104 // list1 "wins". Same number of array elements but longer first element
105 $this->assertEquals(-1, \Nominatim\SimpleWordList::cmpByArraylen($aList1, $aList3));
108 public function testMaxWordSets()
110 $aWords = array_fill(0, 4, 'a');
111 $oList = new SimpleWordList(join(' ', $aWords));
112 $this->assertEquals(8, count($oList->getWordSets(new TokensFullSet())));
114 $aWords = array_fill(0, 18, 'a');
115 $oList = new SimpleWordList(join(' ', $aWords));
116 $this->assertEquals(100, count($oList->getWordSets(new TokensFullSet())));
120 public function testPartialTokensShortTerm()
122 $oList = new SimpleWordList('a b c d');
124 '(a|b c d),(a|b c|d)',
125 $this->serializeSets($oList->getWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d'))))
130 public function testPartialTokensLongTerm()
132 $aWords = array_fill(0, 18, 'a');
133 $oList = new SimpleWordList(join(' ', $aWords));
134 $this->assertEquals(80, count($oList->getWordSets(new TokensPartialSet(array('a', 'a a a a a')))));