]> git.openstreetmap.org Git - nominatim.git/blob - test/php/Nominatim/SimpleWordListTest.php
Merge pull request #2425 from lonvia/tokenizer-documentation
[nominatim.git] / test / php / Nominatim / SimpleWordListTest.php
1 <?php
2
3 namespace Nominatim;
4
5 require_once(CONST_LibDir.'/SimpleWordList.php');
6
7 class TokensFullSet
8 {
9     public function containsAny($sTerm)
10     {
11         return true;
12     }
13 }
14
15 // phpcs:ignore PSR1.Classes.ClassDeclaration.MultipleClasses
16 class TokensPartialSet
17 {
18     public function __construct($aTokens)
19     {
20         $this->aTokens = array_flip($aTokens);
21     }
22
23     public function containsAny($sTerm)
24     {
25         return isset($this->aTokens[$sTerm]);
26     }
27 }
28
29 // phpcs:ignore PSR1.Classes.ClassDeclaration.MultipleClasses
30 class SimpleWordListTest extends \PHPUnit\Framework\TestCase
31 {
32
33
34     private function serializeSets($aSets)
35     {
36         $aParts = array();
37         foreach ($aSets as $aSet) {
38             $aParts[] = '(' . join('|', $aSet) . ')';
39         }
40         return join(',', $aParts);
41     }
42
43
44     public function testEmptyPhrase()
45     {
46         $oList = new SimpleWordList('');
47         $this->assertNull($oList->getWordSets(new TokensFullSet()));
48     }
49
50
51     public function testSingleWordPhrase()
52     {
53         $oList = new SimpleWordList('a');
54
55         $this->assertEquals(
56             '(a)',
57             $this->serializeSets($oList->getWordSets(new TokensFullSet()))
58         );
59     }
60
61
62     public function testMultiWordPhrase()
63     {
64         $oList = new SimpleWordList('a b');
65         $this->assertEquals(
66             '(a b),(a|b)',
67             $this->serializeSets($oList->getWordSets(new TokensFullSet()))
68         );
69
70         $oList = new SimpleWordList('a b c');
71         $this->assertEquals(
72             '(a b c),(a|b c),(a b|c),(a|b|c)',
73             $this->serializeSets($oList->getWordSets(new TokensFullSet()))
74         );
75
76         $oList = new SimpleWordList('a b c d');
77         $this->assertEquals(
78             '(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
79             $this->serializeSets($oList->getWordSets(new TokensFullSet()))
80         );
81     }
82
83
84     public function testMaxWordSets()
85     {
86         $aWords = array_fill(0, 4, 'a');
87         $oList = new SimpleWordList(join(' ', $aWords));
88         $this->assertEquals(8, count($oList->getWordSets(new TokensFullSet())));
89
90         $aWords = array_fill(0, 18, 'a');
91         $oList = new SimpleWordList(join(' ', $aWords));
92         $this->assertEquals(100, count($oList->getWordSets(new TokensFullSet())));
93     }
94
95
96     public function testPartialTokensShortTerm()
97     {
98         $oList = new SimpleWordList('a b c d');
99         $this->assertEquals(
100             '(a|b c d),(a|b c|d)',
101             $this->serializeSets($oList->getWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d'))))
102         );
103     }
104
105
106     public function testPartialTokensLongTerm()
107     {
108         $aWords = array_fill(0, 18, 'a');
109         $oList = new SimpleWordList(join(' ', $aWords));
110         $this->assertEquals(80, count($oList->getWordSets(new TokensPartialSet(array('a', 'a a a a a')))));
111     }
112 }