+ def _create_base_indices(self, config: Configuration, table_name: str) -> None:
+ """ Set up the word table and fill it with pre-computed word
+ frequencies.
+ """
+ with connect(self.dsn) as conn:
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_string(conn,
+ """CREATE INDEX idx_{{table_name}}_word_token ON {{table_name}}
+ USING BTREE (word_token) {{db.tablespace.search_index}}""",
+ table_name=table_name)
+ for name, ctype in WORD_TYPES:
+ sqlp.run_string(conn,
+ """CREATE INDEX idx_{{table_name}}_{{idx_name}} ON {{table_name}}
+ USING BTREE (word) {{db.tablespace.address_index}}
+ WHERE type = '{{column_type}}'
+ """,
+ table_name=table_name, idx_name=name,
+ column_type=ctype)
+ conn.commit()
+
+
+ def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
+ """ Create additional indexes used when running the API.
+ """
+ with connect(self.dsn) as conn:
+ sqlp = SQLPreprocessor(conn, config)
+ # Index required for details lookup.
+ sqlp.run_string(conn, """
+ CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
+ ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
+ """,
+ table_name=table_name)
+ conn.commit()
+
+
+ def _move_temporary_word_table(self, old: str) -> None:
+ """ Rename all tables and indexes used by the tokenizer.
+ """
+ with connect(self.dsn) as conn:
+ with conn.cursor() as cur:
+ cur.drop_table('word')
+ cur.execute(f"ALTER TABLE {old} RENAME TO word")
+ for idx in ('word_token', 'word_id'):
+ cur.execute(f"""ALTER INDEX idx_{old}_{idx}
+ RENAME TO idx_word_{idx}""")
+ for name, _ in WORD_TYPES:
+ cur.execute(f"""ALTER INDEX idx_{old}_{name}
+ RENAME TO idx_word_{name}""")
+ conn.commit()
+
+
+
+
+class ICUNameAnalyzer(AbstractAnalyzer):
+ """ The ICU analyzer uses the ICU library for splitting names.