Skip to content

Commit

Permalink
Merge pull request #3591 from lonvia/increase-required-postgresql
Browse files Browse the repository at this point in the history
Increase version requirements for PostgreSQL and PostGIS
  • Loading branch information
lonvia authored Nov 18, 2024
2 parents 689bcbd + bf683d4 commit 02364ce
Show file tree
Hide file tree
Showing 15 changed files with 78 additions and 171 deletions.
2 changes: 1 addition & 1 deletion .github/actions/build-nominatim/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
shell: bash
- name: Install${{ matrix.flavour }} prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite-dev libsqlite3-mod-spatialite
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 psycopg==3.1.7 datrie asyncpg aiosqlite
else
Expand Down
37 changes: 9 additions & 28 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,16 @@ jobs:
needs: create-archive
strategy:
matrix:
flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
flavour: ["ubuntu-20", "ubuntu-24"]
include:
- flavour: oldstuff
ubuntu: 20
postgresql: '9.6'
postgis: '2.5'
lua: '5.1'
- flavour: ubuntu-20
ubuntu: 20
postgresql: 13
postgresql: 12
postgis: 3
lua: '5.3'
- flavour: ubuntu-22
ubuntu: 22
postgresql: 15
lua: '5.1'
- flavour: ubuntu-24
ubuntu: 24
postgresql: 17
postgis: 3
lua: '5.3'

Expand Down Expand Up @@ -80,37 +75,25 @@ jobs:
flavour: ${{ matrix.flavour }}
lua: ${{ matrix.lua }}

- name: Install test prerequisites (behave from apt)
run: sudo apt-get install -y -qq python3-behave
if: matrix.flavour == 'ubuntu-20'

- name: Install test prerequisites (behave from pip)
- name: Install test prerequisites
run: pip3 install behave==1.2.6
if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')

- name: Install test prerequisites (from apt for Ununtu 2x)
- name: Install test prerequisites
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
if: matrix.flavour != 'oldstuff'

- name: Install newer pytest-asyncio
run: pip3 install -U pytest-asyncio
if: matrix.flavour == 'ubuntu-20'

- name: Install test prerequisites (from pip for Ubuntu 18)
run: pip3 install pytest pytest-asyncio uvicorn
if: matrix.flavour == 'oldstuff'

- name: Install Python webservers
run: pip3 install falcon starlette asgi_lifespan

- name: Install latest flake8
run: pip3 install -U flake8
if: matrix.flavour == 'ubuntu-22'

- name: Python linting
run: python3 -m flake8 src
working-directory: Nominatim
if: matrix.flavour == 'ubuntu-22'

- name: Python unit tests
run: python3 -m pytest test/python
Expand All @@ -124,12 +107,10 @@ jobs:

- name: Install mypy and typechecking info
run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
if: matrix.flavour != 'oldstuff'

- name: Python static typechecking
run: python3 -m mypy --strict src
run: python3 -m mypy --strict --python-version 3.8 src
working-directory: Nominatim
if: matrix.flavour != 'oldstuff'

install:
runs-on: ubuntu-latest
Expand Down
9 changes: 2 additions & 7 deletions docs/admin/Installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,10 @@ and can't offer support.

### Software

!!! Warning
For larger installations you **must have** PostgreSQL 11+ and PostGIS 3+
otherwise import and queries will be slow to the point of being unusable.
Query performance has marked improvements with PostgreSQL 13+ and PostGIS 3.2+.

For running Nominatim:

* [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
* [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
* [PostgreSQL](https://www.postgresql.org) (12+ will work, 13+ strongly recommended)
* [PostGIS](https://postgis.net) (3.0+ will work, 3.2+ strongly recommended)
* [osm2pgsql](https://osm2pgsql.org) (1.8+, optional when building with CMake)
* [Python 3](https://www.python.org/) (3.7+)

Expand Down
24 changes: 10 additions & 14 deletions lib-sql/indices.sql
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,14 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode
---
CREATE INDEX IF NOT EXISTS idx_search_name_centroid
ON search_name USING GIST (centroid) {{db.tablespace.search_index}};

{% if postgres.has_index_non_key_column %}
---
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
ON placex USING btree (parent_place_id)
INCLUDE (housenumber) {{db.tablespace.search_index}}
WHERE housenumber is not null;
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
ON location_property_osmline USING btree(parent_place_id)
INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
WHERE startnumber is not null;
{% endif %}

---
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
ON placex USING btree (parent_place_id)
INCLUDE (housenumber) {{db.tablespace.search_index}}
WHERE housenumber is not null;
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
ON location_property_osmline USING btree(parent_place_id)
INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
WHERE startnumber is not null;
{% endif %}
6 changes: 3 additions & 3 deletions lib-sql/tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,21 @@ CREATE INDEX idx_placex_geometry_address_area_candidates ON placex

-- Usage: - POI is within building with housenumber
CREATE INDEX idx_placex_geometry_buildings ON placex
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE address is not null and rank_search = 30
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');

-- Usage: - linking of similar named places to boundaries
-- - linking of place nodes with same type to boundaries
CREATE INDEX idx_placex_geometry_placenode ON placex
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26
and class = 'place' and type != 'postcode';

-- Usage: - is node part of a way?
-- - find parent of interpolation spatially
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'W' and rank_search >= 26;

-- Usage: - linking place nodes by wikidata tag to boundaries
Expand Down
2 changes: 0 additions & 2 deletions lib-sql/tiger_import_finish.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
--index only on parent_place_id
CREATE INDEX IF NOT EXISTS idx_location_property_tiger_parent_place_id_imp
ON location_property_tiger_import (parent_place_id)
{% if postgres.has_index_non_key_column %}
INCLUDE (startnumber, endnumber, step)
{% endif %}
{{db.tablespace.aux_index}};
CREATE UNIQUE INDEX IF NOT EXISTS idx_location_property_tiger_place_id_imp
ON location_property_tiger_import (place_id) {{db.tablespace.aux_index}};
Expand Down
18 changes: 8 additions & 10 deletions src/nominatim_api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,19 +138,17 @@ def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
async with engine.begin() as conn:
result = await conn.scalar(sa.text('SHOW server_version_num'))
server_version = int(result)
if server_version >= 110000:
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
await conn.execute(sa.text(
"SET max_parallel_workers_per_gather TO '0'"))
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
await conn.execute(sa.text(
"SET max_parallel_workers_per_gather TO '0'"))
except (PGCORE_ERROR, sa.exc.OperationalError):
server_version = 0

if server_version >= 110000:
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
cursor.execute("SET max_parallel_workers_per_gather TO '0'")

self._property_cache['DB:server_version'] = server_version

Expand Down
11 changes: 2 additions & 9 deletions src/nominatim_db/db/sql_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import jinja2

from .connection import Connection, server_version_tuple, postgis_version_tuple
from .connection import Connection
from ..config import Configuration
from ..db.query_pool import QueryPool

Expand Down Expand Up @@ -69,14 +69,7 @@ def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
""" Set up a dictionary with various optional Postgresql/Postgis features that
depend on the database version.
"""
pg_version = server_version_tuple(conn)
postgis_version = postgis_version_tuple(conn)
pg11plus = pg_version >= (11, 0, 0)
ps3 = postgis_version >= (3, 0)
return {
'has_index_non_key_column': pg11plus,
'spgist_geom': 'SPGIST' if pg11plus and ps3 else 'GIST'
}
return {}


class SQLPreprocessor:
Expand Down
107 changes: 32 additions & 75 deletions src/nominatim_db/tokenizer/icu_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from psycopg.types.json import Jsonb
from psycopg import sql as pysql

from ..db.connection import connect, Connection, Cursor, server_version_tuple, \
from ..db.connection import connect, Connection, Cursor, \
drop_tables, table_exists, execute_scalar
from ..config import Configuration
from ..db.sql_preprocessor import SQLPreprocessor
Expand Down Expand Up @@ -110,80 +110,37 @@ def update_statistics(self, config: Configuration, threads: int = 2) -> None:
cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}')
.format(pysql.Literal(min(threads, 6),)))

if server_version_tuple(conn) < (12, 0):
LOG.info('Computing word frequencies')
drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
cur.execute("""CREATE TEMP TABLE word_frequencies AS
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id""")
cur.execute('CREATE INDEX ON word_frequencies(id)')
cur.execute("""CREATE TEMP TABLE addressword_frequencies AS
SELECT unnest(nameaddress_vector) as id, count(*)
FROM search_name GROUP BY id""")
cur.execute('CREATE INDEX ON addressword_frequencies(id)')
cur.execute("""
CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER,
INOUT info JSONB)
AS $$
DECLARE rec RECORD;
BEGIN
IF info is null THEN
info = '{}'::jsonb;
END IF;
FOR rec IN SELECT count FROM word_frequencies WHERE id = wid
LOOP
info = info || jsonb_build_object('count', rec.count);
END LOOP;
FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid
LOOP
info = info || jsonb_build_object('addr_count', rec.count);
END LOOP;
IF info = '{}'::jsonb THEN
info = null;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
""")
LOG.info('Update word table with recomputed frequencies')
drop_tables(conn, 'tmp_word')
cur.execute("""CREATE TABLE tmp_word AS
SELECT word_id, word_token, type, word,
word_freq_update(word_id, info) as info
FROM word
""")
drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
else:
LOG.info('Computing word frequencies')
drop_tables(conn, 'word_frequencies')
cur.execute("""
CREATE TEMP TABLE word_frequencies AS
WITH word_freq AS MATERIALIZED (
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id),
addr_freq AS MATERIALIZED (
SELECT unnest(nameaddress_vector) as id, count(*)
FROM search_name GROUP BY id)
SELECT coalesce(a.id, w.id) as id,
(CASE WHEN w.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('count', w.count) END
||
CASE WHEN a.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('addr_count', a.count) END) as info
FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
""")
cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
cur.execute('ANALYSE word_frequencies')
LOG.info('Update word table with recomputed frequencies')
drop_tables(conn, 'tmp_word')
cur.execute("""CREATE TABLE tmp_word AS
SELECT word_id, word_token, type, word,
(CASE WHEN wf.info is null THEN word.info
ELSE coalesce(word.info, '{}'::jsonb) || wf.info
END) as info
FROM word LEFT JOIN word_frequencies wf
ON word.word_id = wf.id
""")
drop_tables(conn, 'word_frequencies')
LOG.info('Computing word frequencies')
drop_tables(conn, 'word_frequencies')
cur.execute("""
CREATE TEMP TABLE word_frequencies AS
WITH word_freq AS MATERIALIZED (
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id),
addr_freq AS MATERIALIZED (
SELECT unnest(nameaddress_vector) as id, count(*)
FROM search_name GROUP BY id)
SELECT coalesce(a.id, w.id) as id,
(CASE WHEN w.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('count', w.count) END
||
CASE WHEN a.count is null THEN '{}'::JSONB
ELSE jsonb_build_object('addr_count', a.count) END) as info
FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
""")
cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
cur.execute('ANALYSE word_frequencies')
LOG.info('Update word table with recomputed frequencies')
drop_tables(conn, 'tmp_word')
cur.execute("""CREATE TABLE tmp_word AS
SELECT word_id, word_token, type, word,
(CASE WHEN wf.info is null THEN word.info
ELSE coalesce(word.info, '{}'::jsonb) || wf.info
END) as info
FROM word LEFT JOIN word_frequencies wf
ON word.word_id = wf.id
""")
drop_tables(conn, 'word_frequencies')

with conn.cursor() as cur:
cur.execute('SET max_parallel_workers_per_gather TO 0')
Expand Down
9 changes: 4 additions & 5 deletions src/nominatim_db/tools/check_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from textwrap import dedent

from ..config import Configuration
from ..db.connection import connect, Connection, server_version_tuple, \
from ..db.connection import connect, Connection, \
index_exists, table_exists, execute_scalar
from ..db import properties
from ..errors import UsageError
Expand Down Expand Up @@ -121,10 +121,9 @@ def _get_indexes(conn: Connection) -> List[str]:
if table_exists(conn, 'search_name'):
indexes.extend(('idx_search_name_nameaddress_vector',
'idx_search_name_name_vector',
'idx_search_name_centroid'))
if server_version_tuple(conn) >= (11, 0, 0):
indexes.extend(('idx_placex_housenumber',
'idx_osmline_parent_osm_id_with_hnr'))
'idx_search_name_centroid',
'idx_placex_housenumber',
'idx_osmline_parent_osm_id_with_hnr'))

# These won't exist if --no-updates import was used
if table_exists(conn, 'place'):
Expand Down
5 changes: 1 addition & 4 deletions src/nominatim_db/tools/database_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,7 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')

postgis_version = postgis_version_tuple(conn)
if postgis_version[0] >= 3:
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')

conn.commit()

Expand Down
Loading

0 comments on commit 02364ce

Please sign in to comment.