]> git.openstreetmap.org Git - nominatim.git/blob - test/python/tools/test_database_import.py
Merge pull request #3328 from lonvia/word-count-into-new-table
[nominatim.git] / test / python / tools / test_database_import.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for functions to import a new database.
9 """
10 from pathlib import Path
11 from contextlib import closing
12
13 import pytest
14 import psycopg2
15
16 from nominatim.tools import database_import
17 from nominatim.errors import UsageError
18
19 class TestDatabaseSetup:
20     DBNAME = 'test_nominatim_python_unittest'
21
22     @pytest.fixture(autouse=True)
23     def setup_nonexistant_db(self):
24         conn = psycopg2.connect(database='postgres')
25
26         try:
27             conn.set_isolation_level(0)
28             with conn.cursor() as cur:
29                 cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
30
31             yield True
32
33             with conn.cursor() as cur:
34                 cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
35         finally:
36             conn.close()
37
38     @pytest.fixture
39     def cursor(self):
40         conn = psycopg2.connect(database=self.DBNAME)
41
42         try:
43             with conn.cursor() as cur:
44                 yield cur
45         finally:
46             conn.close()
47
48
49     def conn(self):
50         return closing(psycopg2.connect(database=self.DBNAME))
51
52
53     def test_setup_skeleton(self):
54         database_import.setup_database_skeleton(f'dbname={self.DBNAME}')
55
56         # Check that all extensions are set up.
57         with self.conn() as conn:
58             with conn.cursor() as cur:
59                 cur.execute('CREATE TABLE t (h HSTORE, geom GEOMETRY(Geometry, 4326))')
60
61
62     def test_unsupported_pg_version(self, monkeypatch):
63         monkeypatch.setattr(database_import, 'POSTGRESQL_REQUIRED_VERSION', (100, 4))
64
65         with pytest.raises(UsageError, match='PostgreSQL server is too old.'):
66             database_import.setup_database_skeleton(f'dbname={self.DBNAME}')
67
68
69     def test_create_db_explicit_ro_user(self):
70         database_import.setup_database_skeleton(f'dbname={self.DBNAME}',
71                                                 rouser='postgres')
72
73
74     def test_create_db_missing_ro_user(self):
75         with pytest.raises(UsageError, match='Missing read-only user.'):
76             database_import.setup_database_skeleton(f'dbname={self.DBNAME}',
77                                                     rouser='sdfwkjkjgdugu2;jgsafkljas;')
78
79
80     def test_setup_extensions_old_postgis(self, monkeypatch):
81         monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
82
83         with pytest.raises(UsageError, match='PostGIS is too old.'):
84             database_import.setup_database_skeleton(f'dbname={self.DBNAME}')
85
86
87 def test_setup_skeleton_already_exists(temp_db):
88     with pytest.raises(UsageError):
89         database_import.setup_database_skeleton(f'dbname={temp_db}')
90
91
92 def test_import_osm_data_simple(table_factory, osm2pgsql_options, capfd):
93     table_factory('place', content=((1, ), ))
94
95     database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
96     captured = capfd.readouterr()
97
98     assert '--create' in captured.out
99     assert '--output gazetteer' in captured.out
100     assert f'--style {osm2pgsql_options["osm2pgsql_style"]}' in captured.out
101     assert f'--number-processes {osm2pgsql_options["threads"]}' in captured.out
102     assert f'--cache {osm2pgsql_options["osm2pgsql_cache"]}' in captured.out
103     assert 'file.pbf' in captured.out
104
105
106 def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, capfd):
107     table_factory('place', content=((1, ), ))
108     osm2pgsql_options['osm2pgsql_cache'] = 0
109
110     files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
111     for f in files:
112         f.write_text('test')
113
114     database_import.import_osm_data(files, osm2pgsql_options)
115     captured = capfd.readouterr()
116
117     assert 'file1.osm' in captured.out
118     assert 'file2.osm' in captured.out
119
120
121 def test_import_osm_data_simple_no_data(table_factory, osm2pgsql_options):
122     table_factory('place')
123
124     with pytest.raises(UsageError, match='No data imported'):
125         database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
126
127
128 def test_import_osm_data_simple_ignore_no_data(table_factory, osm2pgsql_options):
129     table_factory('place')
130
131     database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options,
132                                     ignore_errors=True)
133
134
135 def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_options):
136     table_factory('place', content=((1, ), ))
137     table_factory('planet_osm_nodes')
138
139     flatfile = tmp_path / 'flatfile'
140     flatfile.write_text('touch')
141
142     osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
143
144     database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options, drop=True)
145
146     assert not flatfile.exists()
147     assert not temp_db_conn.table_exists('planet_osm_nodes')
148
149
150 def test_import_osm_data_default_cache(table_factory, osm2pgsql_options, capfd):
151     table_factory('place', content=((1, ), ))
152
153     osm2pgsql_options['osm2pgsql_cache'] = 0
154
155     database_import.import_osm_data(Path(__file__), osm2pgsql_options)
156     captured = capfd.readouterr()
157
158     assert f'--cache {osm2pgsql_options["osm2pgsql_cache"]}' in captured.out
159
160
161 @pytest.mark.parametrize("with_search", (True, False))
162 def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory, with_search):
163     tables = ['placex', 'place_addressline', 'location_area',
164               'location_area_country',
165               'location_property_tiger', 'location_property_osmline',
166               'location_postcode', 'location_road_23']
167     if with_search:
168         tables.append('search_name')
169
170     for table in tables:
171         table_factory(table, content=((1, ), (2, ), (3, )))
172         assert temp_db_cursor.table_rows(table) == 3
173
174     database_import.truncate_data_tables(temp_db_conn)
175
176     for table in tables:
177         assert temp_db_cursor.table_rows(table) == 0
178
179
180 @pytest.mark.parametrize("threads", (1, 5))
181 def test_load_data(dsn, place_row, placex_table, osmline_table,
182                    temp_db_cursor, threads):
183     for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
184         temp_db_cursor.execute(f"""CREATE FUNCTION {func} (src TEXT)
185                                   RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
186                                """)
187     for oid in range(100, 130):
188         place_row(osm_id=oid)
189     place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
190               geom='SRID=4326;LINESTRING(0 0, 10 10)')
191
192     database_import.load_data(dsn, threads)
193
194     assert temp_db_cursor.table_rows('placex') == 30
195     assert temp_db_cursor.table_rows('location_property_osmline') == 1
196
197
198 class TestSetupSQL:
199
200     @pytest.fixture(autouse=True)
201     def init_env(self, temp_db, tmp_path, def_config, sql_preprocessor_cfg):
202         def_config.lib_dir.sql = tmp_path / 'sql'
203         def_config.lib_dir.sql.mkdir()
204
205         self.config = def_config
206
207
208     def write_sql(self, fname, content):
209         (self.config.lib_dir.sql / fname).write_text(content)
210
211
212     @pytest.mark.parametrize("reverse", [True, False])
213     def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse):
214         self.write_sql('tables.sql',
215                        """CREATE FUNCTION test() RETURNS bool
216                           AS $$ SELECT {{db.reverse_only}} $$ LANGUAGE SQL""")
217
218         database_import.create_tables(temp_db_conn, self.config, reverse)
219
220         temp_db_cursor.scalar('SELECT test()') == reverse
221
222
223     def test_create_table_triggers(self, temp_db_conn, temp_db_cursor):
224         self.write_sql('table-triggers.sql',
225                        """CREATE FUNCTION test() RETURNS TEXT
226                           AS $$ SELECT 'a'::text $$ LANGUAGE SQL""")
227
228         database_import.create_table_triggers(temp_db_conn, self.config)
229
230         temp_db_cursor.scalar('SELECT test()') == 'a'
231
232
233     def test_create_partition_tables(self, temp_db_conn, temp_db_cursor):
234         self.write_sql('partition-tables.src.sql',
235                        """CREATE FUNCTION test() RETURNS TEXT
236                           AS $$ SELECT 'b'::text $$ LANGUAGE SQL""")
237
238         database_import.create_partition_tables(temp_db_conn, self.config)
239
240         temp_db_cursor.scalar('SELECT test()') == 'b'
241
242
243     @pytest.mark.parametrize("drop", [True, False])
244     def test_create_search_indices(self, temp_db_conn, temp_db_cursor, drop):
245         self.write_sql('indices.sql',
246                        """CREATE FUNCTION test() RETURNS bool
247                           AS $$ SELECT {{drop}} $$ LANGUAGE SQL""")
248
249         database_import.create_search_indices(temp_db_conn, self.config, drop)
250
251         temp_db_cursor.scalar('SELECT test()') == drop