+ def get_word_token_info(self, conn, words):
+ """ Return token information for the given list of words.
+ If a word starts with # it is assumed to be a full name
+ otherwise is a partial name.
+
+ The function returns a list of tuples with
+ (original word, word token, word id).
+
+ The function is used for testing and debugging only
+ and not necessarily efficient.
+ """
+ tokens = {}
+ for word in words:
+ if word.startswith('#'):
+ tokens[word] = ' ' + self.make_standard_word(word[1:])
+ else:
+ tokens[word] = self.make_standard_word(word)
+
+ with conn.cursor() as cur:
+ cur.execute("""SELECT word_token, word_id
+ FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+ WHERE word_token = t.term
+ and class is null and country_code is null""",
+ (list(tokens.values()), ))
+ ids = {r[0]: r[1] for r in cur}
+
+ return [(k, v, ids[v]) for k, v in tokens.items()]
+
+