]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/postcodes.py
add unit tests for legacy ICU tokenizer
[nominatim.git] / nominatim / tools / postcodes.py
1 """
2 Functions for importing, updating and otherwise maintaining the table
3 of artificial postcode centroids.
4 """
5
6 from nominatim.db.utils import execute_file
7 from nominatim.db.connection import connect
8
9 def import_postcodes(dsn, project_dir, tokenizer):
10     """ Set up the initial list of postcodes.
11     """
12
13     with connect(dsn) as conn:
14         conn.drop_table('gb_postcode')
15         conn.drop_table('us_postcode')
16
17         with conn.cursor() as cur:
18             cur.execute("""CREATE TABLE gb_postcode (
19                             id integer,
20                             postcode character varying(9),
21                             geometry GEOMETRY(Point, 4326))""")
22
23         with conn.cursor() as cur:
24             cur.execute("""CREATE TABLE us_postcode (
25                             postcode text,
26                             x double precision,
27                             y double precision)""")
28         conn.commit()
29
30         gb_postcodes = project_dir / 'gb_postcode_data.sql.gz'
31         if gb_postcodes.is_file():
32             execute_file(dsn, gb_postcodes)
33
34         us_postcodes = project_dir / 'us_postcode_data.sql.gz'
35         if us_postcodes.is_file():
36             execute_file(dsn, us_postcodes)
37
38         with conn.cursor() as cur:
39             cur.execute("TRUNCATE location_postcode")
40             cur.execute("""
41                 INSERT INTO location_postcode
42                  (place_id, indexed_status, country_code, postcode, geometry)
43                 SELECT nextval('seq_place'), 1, country_code,
44                        token_normalized_postcode(address->'postcode') as pc,
45                        ST_Centroid(ST_Collect(ST_Centroid(geometry)))
46                   FROM placex
47                  WHERE address ? 'postcode'
48                        and token_normalized_postcode(address->'postcode') is not null
49                        AND geometry IS NOT null
50                  GROUP BY country_code, pc
51             """)
52
53             cur.execute("""
54                 INSERT INTO location_postcode
55                  (place_id, indexed_status, country_code, postcode, geometry)
56                 SELECT nextval('seq_place'), 1, 'us',
57                        token_normalized_postcode(postcode),
58                        ST_SetSRID(ST_Point(x,y),4326)
59                   FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
60                         (SELECT postcode FROM location_postcode
61                           WHERE country_code = 'us')
62             """)
63
64             cur.execute("""
65                 INSERT INTO location_postcode
66                  (place_id, indexed_status, country_code, postcode, geometry)
67                 SELECT nextval('seq_place'), 1, 'gb',
68                        token_normalized_postcode(postcode), geometry
69                   FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
70                            (SELECT postcode FROM location_postcode
71                              WHERE country_code = 'gb')
72             """)
73
74             cur.execute("""
75                     DELETE FROM word WHERE class='place' and type='postcode'
76                     and word NOT IN (SELECT postcode FROM location_postcode)
77             """)
78         conn.commit()
79
80         with tokenizer.name_analyzer() as analyzer:
81             analyzer.add_postcodes_from_db()