Skip to content

Commit b8a4cce

Browse files
authored
Merge pull request #6 from govlt/streets-and-addresses
Import streets and addresses
2 parents a6b4151 + 53bcb84 commit b8a4cce

File tree

10 files changed

+708
-150
lines changed

10 files changed

+708
-150
lines changed

api/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0 as database-builder
33
WORKDIR /opt/database
44

5+
RUN apt-get update && apt-get install -y csvkit && rm -rf /var/lib/apt/lists/*
6+
57
COPY create-database.sh ./create-database.sh
68

79
RUN sh create-database.sh

api/create-database.sh

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,68 @@ echo "Importing counties data into SQLite"
2424
curl -f -o data-sources/counties.json https://www.registrucentras.lt/aduomenys/?byla=adr_gra_apskritys.json
2525
calculate_md5 data-sources/counties.json >> data-sources/checksums.txt
2626
ogr2ogr -f SQLite boundaries.sqlite data-sources/counties.json -dsco SPATIALITE=YES -lco FID=feature_id -lco GEOMETRY_NAME=geom \
27-
-sql "SELECT FID AS feature_id, APS_KODAS AS code, APS_PAV as name, APS_PLOTAS as area_ha FROM counties"
27+
-sql "SELECT FID AS feature_id, CAST(APS_KODAS AS integer(8)) AS code, APS_PAV as name, APS_PLOTAS as area_ha FROM counties"
2828
ogrinfo -sql "CREATE UNIQUE INDEX counties_code ON counties(code)" boundaries.sqlite
2929

3030
echo "Importing municipalities data into SQLite"
3131
curl -f -o data-sources/municipalities.json https://www.registrucentras.lt/aduomenys/?byla=adr_gra_savivaldybes.json
3232
calculate_md5 data-sources/municipalities.json >> data-sources/checksums.txt
3333
ogr2ogr -append -f SQLite boundaries.sqlite data-sources/municipalities.json -lco FID=feature_id -lco GEOMETRY_NAME=geom \
34-
-sql "SELECT FID AS feature_id, SAV_KODAS AS code, SAV_PAV as name, SAV_PLOTAS as area_ha, APS_KODAS as county_code FROM municipalities"
34+
-sql "SELECT FID AS feature_id, CAST(SAV_KODAS AS integer(8)) AS code, SAV_PAV as name, SAV_PLOTAS as area_ha, CAST(APS_KODAS AS integer(8)) as county_code FROM municipalities"
3535
ogrinfo -sql "CREATE UNIQUE INDEX municipalities_code ON municipalities(code)" boundaries.sqlite
36+
ogrinfo -sql "CREATE INDEX municipalities_county_code ON municipalities(county_code)" boundaries.sqlite
3637

3738
echo "Importing elderships data into SQLite"
3839
curl -f -o data-sources/elderships.json https://www.registrucentras.lt/aduomenys/?byla=adr_gra_seniunijos.json
3940
calculate_md5 data-sources/elderships.json >> data-sources/checksums.txt
4041
ogr2ogr -append -f SQLite boundaries.sqlite data-sources/elderships.json -lco FID=feature_id -lco GEOMETRY_NAME=geom \
41-
-sql "SELECT FID AS feature_id, SEN_KODAS AS code, SEN_PAV as name, SEN_PLOTAS as area_ha, SAV_KODAS AS municipality_code FROM elderships"
42+
-sql "SELECT FID AS feature_id, CAST(SEN_KODAS AS integer(8)) AS code, SEN_PAV as name, SEN_PLOTAS as area_ha, CAST(SAV_KODAS AS integer(8)) AS municipality_code FROM elderships"
4243
ogrinfo -sql "CREATE UNIQUE INDEX elderships_code ON elderships(code)" boundaries.sqlite
44+
ogrinfo -sql "CREATE INDEX elderships_municipality_code ON elderships(municipality_code)" boundaries.sqlite
4345

4446
echo "Importing residential areas data into SQLite"
4547
curl -f -o data-sources/residential_areas.json https://www.registrucentras.lt/aduomenys/?byla=adr_gra_gyvenamosios_vietoves.json
4648
calculate_md5 data-sources/residential_areas.json >> data-sources/checksums.txt
47-
# For some reason GYV_KODAS is numeric in RC specification, convert it to text
4849
ogr2ogr -append -f SQLite boundaries.sqlite data-sources/residential_areas.json -lco FID=feature_id -lco GEOMETRY_NAME=geom \
49-
-sql "SELECT FID AS feature_id, CAST(GYV_KODAS AS character(255)) AS code, GYV_PAV as name, PLOTAS as area_ha, SAV_KODAS AS municipality_code FROM residential_areas"
50+
-sql "SELECT FID AS feature_id, GYV_KODAS AS code, GYV_PAV as name, PLOTAS as area_ha, CAST(SAV_KODAS AS integer(8)) AS municipality_code FROM residential_areas"
5051
ogrinfo -sql "CREATE UNIQUE INDEX residential_areas_code ON residential_areas(code)" boundaries.sqlite
52+
ogrinfo -sql "CREATE INDEX residential_municipality_code ON residential_areas(municipality_code)" boundaries.sqlite
53+
54+
echo "Importing streets data into SQLite"
55+
curl -f -o data-sources/streets.json https://www.registrucentras.lt/aduomenys/?byla=adr_gra_gatves.json
56+
calculate_md5 data-sources/streets.json >> data-sources/checksums.txt
57+
ogr2ogr -append -f SQLite boundaries.sqlite data-sources/streets.json -lco FID=feature_id -lco GEOMETRY_NAME=geom \
58+
-sql "SELECT FID AS feature_id, GAT_KODAS AS code, GAT_PAV as name, GAT_PAV_PI AS full_name, GAT_ILGIS as length_m, GYV_KODAS AS residential_area_code FROM streets"
59+
ogrinfo -sql "CREATE UNIQUE INDEX streets_code ON streets(code)" boundaries.sqlite
60+
ogrinfo -sql "CREATE INDEX streets_residential_area_code ON streets(residential_area_code)" boundaries.sqlite
61+
62+
echo "Importing addresses data into SQLite"
63+
64+
curl -f -o data-sources/addresses-information.psv https://www.registrucentras.lt/aduomenys/?byla=adr_stat_lr.csv
65+
calculate_md5 data-sources/addresses-information.psv >> data-sources/checksums.txt
66+
67+
ogr2ogr -f GPKG data-sources/addresses.gpkg data-sources/addresses-information.psv -nln info
68+
69+
# The complete geojson data with all municipalities is updated only once a year. However,
70+
# when downloaded per municipality, it is updated every month. To ensure we have the latest data,
71+
# this step pulls data for each municipality individually.
72+
echo "Importing address points for each municipality"
73+
74+
curl -sf "https://www.registrucentras.lt/aduomenys/?byla=adr_savivaldybes.csv" | csvcut -d "|" -c "SAV_KODAS" | tail -n +2 | while read -r code; do
75+
echo "Converting https://www.registrucentras.lt/aduomenys/?byla=adr_gra_$code.json"
76+
curl -f -o "data-sources/addresses-$code.json" "https://www.registrucentras.lt/aduomenys/?byla=adr_gra_$code.json"
77+
calculate_md5 "data-sources/addresses-$code.json" >> data-sources/checksums.txt
78+
79+
ogr2ogr -append -f GPKG data-sources/addresses.gpkg "data-sources/addresses-$code.json" -nln points
80+
done
81+
82+
echo "Finishing addresses data import into SQLite"
83+
ogr2ogr -append -f SQLite boundaries.sqlite data-sources/addresses.gpkg -lco FID=feature_id -nln addresses \
84+
-sql "SELECT points.fid AS feature_id, points.geom, points.AOB_KODAS as code, CAST(info.sav_kodas AS integer(8)) AS municipality_code, points.gyv_kodas AS residential_area_code, points.gat_kodas AS street_code, info.nr AS plot_or_building_number, info.pasto_kodas AS postal_code, NULLIF(info.korpuso_nr, '') AS building_block_number FROM points INNER JOIN info USING (AOB_KODAS) ORDER BY AOB_KODAS"
85+
ogrinfo -sql "CREATE UNIQUE INDEX addresses_code ON addresses(code)" boundaries.sqlite
86+
ogrinfo -sql "CREATE INDEX addresses_municipality_code ON addresses(municipality_code)" boundaries.sqlite
87+
ogrinfo -sql "CREATE INDEX addresses_residential_area_code ON addresses(residential_area_code)" boundaries.sqlite
88+
ogrinfo -sql "CREATE INDEX addresses_street_code ON addresses(street_code)" boundaries.sqlite
5189

5290
echo "Finalizing SQLite database"
5391
ogrinfo boundaries.sqlite -sql "VACUUM"

api/database.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
def _connect():
1010
sqlean.extensions.enable("unicode")
11-
conn = sqlean.connect("file:boundaries.sqlite?immutable=1", uri=True)
11+
conn = sqlean.connect("file:boundaries.sqlite?immutable=1", uri=True, check_same_thread=False)
1212
load_spatialite(conn)
1313
return conn
1414

api/filters.py

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
from abc import ABC
2+
3+
from geoalchemy2.functions import ST_Intersects, ST_Transform, ST_GeomFromEWKT, ST_Contains, ST_IsValid
4+
from sqlalchemy import Select
5+
from sqlalchemy.orm import Session, InstrumentedAttribute
6+
from sqlalchemy.sql.functions import GenericFunction
7+
from sqlean import OperationalError
8+
9+
import database
10+
import models
11+
import schemas
12+
13+
14+
class BaseFilter(ABC):
15+
model_class: type(database.Base)
16+
17+
def apply(
18+
self,
19+
request: schemas.BaseSearchRequest,
20+
db: Session,
21+
query: Select,
22+
):
23+
if geometry_filter := request.geometry:
24+
query = self._apply_geometry_filter(
25+
geometry_filter=geometry_filter,
26+
db=db,
27+
query=query
28+
)
29+
return query
30+
31+
def _apply_general_boundaries_filter(
32+
self,
33+
general_boundaries_filter: schemas.GeneralBoundariesFilter,
34+
query: Select
35+
) -> Select:
36+
if hasattr(self.model_class, 'name') and general_boundaries_filter.name:
37+
query = _filter_by_string_field(
38+
string_filter=general_boundaries_filter.name,
39+
query=query,
40+
string_field=getattr(self.model_class, 'name')
41+
)
42+
43+
feature_ids = general_boundaries_filter.feature_ids
44+
if feature_ids and len(general_boundaries_filter.feature_ids) > 0:
45+
query = query.filter(getattr(self.model_class, 'feature_id').in_(feature_ids))
46+
47+
codes = general_boundaries_filter.codes
48+
if codes and len(codes) > 0:
49+
query = query.filter(getattr(self.model_class, 'code').in_(codes))
50+
51+
return query
52+
53+
def _apply_geometry_filter(
54+
self,
55+
geometry_filter: schemas.GeometryFilter,
56+
db: Session,
57+
query: Select,
58+
) -> Select:
59+
filter_func_type = _get_filter_func(geometry_filter.method)
60+
geom_field = getattr(self.model_class, 'geom')
61+
62+
if ewkb := geometry_filter.ewkb:
63+
query = _filter_by_geometry(
64+
db=db,
65+
query=query,
66+
field="ewkb",
67+
geom_value=ewkb,
68+
filter_func_type=filter_func_type,
69+
geom_from_func_type=database.GeomFromEWKB,
70+
geom_field=geom_field,
71+
)
72+
73+
if ewkt := geometry_filter.ewkt:
74+
query = _filter_by_geometry(
75+
db=db,
76+
query=query,
77+
field="ewkt",
78+
geom_value=ewkt,
79+
filter_func_type=filter_func_type,
80+
geom_from_func_type=ST_GeomFromEWKT,
81+
geom_field=geom_field,
82+
)
83+
84+
if geojson := geometry_filter.geojson:
85+
query = _filter_by_geometry(
86+
db=db,
87+
query=query,
88+
field="geojson",
89+
geom_value=geojson,
90+
filter_func_type=filter_func_type,
91+
geom_from_func_type=database.GeomFromGeoJSON,
92+
geom_field=geom_field,
93+
)
94+
95+
return query
96+
97+
98+
class CountiesFilter(BaseFilter):
99+
model_class = models.Counties
100+
101+
def apply(
102+
self,
103+
request: schemas.CountiesSearchRequest,
104+
db: Session,
105+
query: Select,
106+
):
107+
query = super().apply(request, db, query)
108+
109+
if counties_filter := request.counties:
110+
query = self._apply_general_boundaries_filter(general_boundaries_filter=counties_filter, query=query)
111+
112+
return query
113+
114+
115+
class MunicipalitiesFilter(CountiesFilter):
116+
model_class = models.Municipalities
117+
118+
def apply(
119+
self,
120+
request: schemas.MunicipalitiesSearchRequest,
121+
db: Session,
122+
query: Select,
123+
):
124+
query = super().apply(request, db, query)
125+
if municipalities_filter := request.municipalities:
126+
query = self._apply_general_boundaries_filter(
127+
general_boundaries_filter=municipalities_filter,
128+
query=query,
129+
)
130+
131+
return query
132+
133+
134+
class EldershipsFilter(MunicipalitiesFilter):
135+
model_class = models.Elderships
136+
137+
def apply(
138+
self,
139+
request: schemas.EldershipsSearchRequest,
140+
db: Session,
141+
query: Select,
142+
):
143+
query = super().apply(request, db, query)
144+
if elderships_filter := request.elderships:
145+
query = self._apply_general_boundaries_filter(
146+
general_boundaries_filter=elderships_filter,
147+
query=query,
148+
)
149+
return query
150+
151+
152+
class ResidentialAreasFilter(MunicipalitiesFilter):
153+
model_class = models.ResidentialAreas
154+
155+
def apply(
156+
self,
157+
request: schemas.ResidentialAreasSearchRequest,
158+
db: Session,
159+
query: Select,
160+
):
161+
query = super().apply(request, db, query)
162+
if residential_areas_filter := request.residential_areas:
163+
query = self._apply_general_boundaries_filter(
164+
general_boundaries_filter=residential_areas_filter, query=query,
165+
)
166+
167+
return query
168+
169+
170+
class StreetsFilter(ResidentialAreasFilter):
171+
model_class = models.Streets
172+
173+
def apply(
174+
self,
175+
request: schemas.StreetsSearchRequest,
176+
db: Session,
177+
query: Select,
178+
):
179+
query = super().apply(request, db, query)
180+
if streets_filter := request.streets:
181+
query = self._apply_general_boundaries_filter(
182+
general_boundaries_filter=streets_filter,
183+
query=query,
184+
)
185+
186+
return query
187+
188+
189+
class AddressesFilter(StreetsFilter):
190+
model_class = models.Addresses
191+
192+
def apply(
193+
self,
194+
request: schemas.AddressesSearchRequest,
195+
db: Session,
196+
query: Select,
197+
):
198+
query = super().apply(request, db, query)
199+
200+
return query
201+
202+
203+
def _is_valid_geometry(db: Session, geom: GenericFunction) -> bool:
204+
try:
205+
return db.execute(ST_IsValid(geom)).scalar() == 1
206+
except OperationalError:
207+
return False
208+
209+
210+
def _filter_by_geometry(
211+
db: Session,
212+
query: Select,
213+
geom_value: str,
214+
field: str,
215+
geom_field: InstrumentedAttribute,
216+
filter_func_type: type(GenericFunction),
217+
geom_from_func_type: type(GenericFunction),
218+
):
219+
geom = ST_Transform(geom_from_func_type(geom_value), 3346)
220+
if not _is_valid_geometry(db, geom):
221+
raise InvalidFilterGeometry(message="Invalid geometry", field=field, value=geom_value)
222+
223+
return query.where(filter_func_type(geom, geom_field))
224+
225+
226+
def _get_filter_func(filter_method: schemas.GeometryFilterMethod) -> type(GenericFunction):
227+
match filter_method:
228+
case schemas.GeometryFilterMethod.intersects:
229+
return ST_Intersects
230+
case schemas.GeometryFilterMethod.contains:
231+
return ST_Contains
232+
case _:
233+
raise ValueError(f"Unknown geometry filter method: {filter_method}")
234+
235+
236+
def _filter_by_string_field(
237+
string_filter: schemas.StringFilter,
238+
query: Select,
239+
string_field: InstrumentedAttribute
240+
) -> Select:
241+
if string_filter.contains:
242+
query = query.filter(string_field.icontains(string_filter.contains))
243+
if string_filter.starts:
244+
query = query.filter(string_field.istartswith(string_filter.starts))
245+
246+
return query
247+
248+
249+
class InvalidFilterGeometry(Exception):
250+
def __init__(self, message: str, field: str, value: str):
251+
self.message = message
252+
self.field = field
253+
self.value = value
254+
super().__init__(self.message)

0 commit comments

Comments
 (0)