"""
Preprocessing of SQL files.
"""
-from typing import Set, Dict, Any
+from typing import Set, Dict, Any, cast
import jinja2
from nominatim.db.connection import Connection
def _get_tables(conn: Connection) -> Set[str]:
""" Return the set of tables currently in use.
- Only includes non-partitioned
"""
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
return set((row[0] for row in list(cur)))
+def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
+ """ Returns the version of the slim middle tables.
+ """
+ if 'osm2pgsql_properties' not in tables:
+ return '1'
+
+ with conn.cursor() as cur:
+ cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
+ row = cur.fetchone()
+
+ return cast(str, row[0]) if row is not None else '1'
+
def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
""" Returns a dict with tablespace expressions for the different tablespace
db_info['tables'] = _get_tables(conn)
db_info['reverse_only'] = 'search_name' not in db_info['tables']
db_info['tablespace'] = _setup_tablespace_sql(config)
+ db_info['middle_db_format'] = _get_middle_db_format(conn, db_info['tables'])
self.env.globals['config'] = config
self.env.globals['db'] = db_info
def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
**kwargs: Any) -> None:
- """ Execure the given SQL files using parallel asynchronous connections.
+ """ Execute the given SQL files using parallel asynchronous connections.
The keyword arguments may supply additional parameters for
preprocessing.
for tests on data that looks up members.
"""
with context.db.cursor() as cur:
- for r in context.table:
- last_node = 0
- last_way = 0
- parts = []
- if r['members']:
- members = []
- for m in r['members'].split(','):
- mid = NominatimID(m)
- if mid.typ == 'N':
- parts.insert(last_node, int(mid.oid))
- last_node += 1
- last_way += 1
- elif mid.typ == 'W':
- parts.insert(last_way, int(mid.oid))
- last_way += 1
- else:
- parts.append(int(mid.oid))
-
- members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
- else:
- members = None
-
- tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
-
- cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
- VALUES (%s, %s, %s, %s, %s, %s)""",
- (r['id'], last_node, last_way, parts, members, list(tags)))
+ cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
+ row = cur.fetchone()
+ if row is None or row[0] == '1':
+ for r in context.table:
+ last_node = 0
+ last_way = 0
+ parts = []
+ if r['members']:
+ members = []
+ for m in r['members'].split(','):
+ mid = NominatimID(m)
+ if mid.typ == 'N':
+ parts.insert(last_node, int(mid.oid))
+ last_node += 1
+ last_way += 1
+ elif mid.typ == 'W':
+ parts.insert(last_way, int(mid.oid))
+ last_way += 1
+ else:
+ parts.append(int(mid.oid))
+
+ members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
+ else:
+ members = None
+
+ tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
+
+ cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
+ VALUES (%s, %s, %s, %s, %s, %s)""",
+ (r['id'], last_node, last_way, parts, members, list(tags)))
+ else:
+ for r in context.table:
+ if r['members']:
+ members = []
+ for m in r['members'].split(','):
+ mid = NominatimID(m)
+ members.append({'ref': mid.oid, 'role': mid.cls or '', 'type': mid.typ})
+ else:
+ members = []
+
+ tags = {h[5:]: r[h] for h in r.headings if h.startswith("tags+")}
+
+ cur.execute("""INSERT INTO planet_osm_rels (id, tags, members)
+ VALUES (%s, %s, %s)""",
+ (r['id'], psycopg2.extras.Json(tags),
+ psycopg2.extras.Json(members)))
@given("the ways")
def add_data_to_planet_ways(context):
tests on that that looks up node ids in this table.
"""
with context.db.cursor() as cur:
+ cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
+ row = cur.fetchone()
+ json_tags = row is not None and row[0] != '1'
for r in context.table:
- tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
+ if json_tags:
+ tags = psycopg2.extras.Json({h[5:]: r[h] for h in r.headings if h.startswith("tags+")})
+ else:
+ tags = list(chain.from_iterable([(h[5:], r[h])
+ for h in r.headings if h.startswith("tags+")]))
nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
- (r['id'], nodes, list(tags)))
+ (r['id'], nodes, tags))
################################ WHEN ##################################
def check_place_contents(context, table, exact):
""" Check contents of place/placex tables. Each row represents a table row
and all data must match. Data not present in the expected table, may
- be arbitry. The rows are identified via the 'object' column which must
+ be arbitrary. The rows are identified via the 'object' column which must
have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
rows match (for example because 'class' was left out and there are
multiple entries for the given OSM object) then all must match. All
def check_search_name_contents(context, exclude):
""" Check contents of place/placex tables. Each row represents a table row
and all data must match. Data not present in the expected table, may
- be arbitry. The rows are identified via the 'object' column which must
+ be arbitrary. The rows are identified via the 'object' column which must
have an identifier of the form '<NRW><osm id>[:<class>]'. All
expected rows are expected to be present with at least one database row.
"""
def check_location_postcode(context):
""" Check full contents for location_postcode table. Each row represents a table row
and all data must match. Data not present in the expected table, may
- be arbitry. The rows are identified via 'country' and 'postcode' columns.
+ be arbitrary. The rows are identified via 'country' and 'postcode' columns.
All rows must be present as excepted and there must not be additional
rows.
"""
def check_place_addressline(context):
""" Check the contents of the place_addressline table. Each row represents
a table row and all data must match. Data not present in the expected
- table, may be arbitry. The rows are identified via the 'object' column,
+ table, may be arbitrary. The rows are identified via the 'object' column,
representing the addressee and the 'address' column, representing the
address item.
"""
def check_place_contents(context, exact):
""" Check contents of the interpolation table. Each row represents a table row
and all data must match. Data not present in the expected table, may
- be arbitry. The rows are identified via the 'object' column which must
+ be arbitrary. The rows are identified via the 'object' column which must
have an identifier of the form '<osm id>[:<startnumber>]'. When multiple
rows match (for example because 'startnumber' was left out and there are
multiple entries for the given OSM object) then all must match. All