Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature by Feature Geometries #73

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
language: python
dist: xenial
python:
- '2.7'
- '3.7'
- '3.8'
install:
- pip install -U pip twine wheel setuptools pipenv
- pipenv install --dev
Expand Down
3 changes: 3 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ name = "pypi"

[packages]
requests = "*"
pyproj = "3.0"
shapely = "1.7"
esridump = {editable = true, path = "."}

[dev-packages]
mock = "*"
Expand Down
66 changes: 53 additions & 13 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 51 additions & 10 deletions esridump/dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def __init__(self, url, parent_logger=None,
self._precision = geometry_precision or 7
self._paginate_oid = paginate_oid

# Used as a fallback if the /query endpoint doesn't return geometries
# Will attempt a single /<feature-id> call, if it returns a geometry it
# will continue querying individual features, if not, set to False
self._has_individual_geom = True

self._pause_seconds = pause_seconds
self._requests_to_pause = requests_to_pause
self._num_of_retry = num_of_retry
Expand All @@ -37,6 +42,9 @@ def __init__(self, url, parent_logger=None,
else:
self._logger = logging.getLogger('esridump')

self._metadata = self.get_metadata()


def _request(self, method, url, **kwargs):
try:

Expand Down Expand Up @@ -219,6 +227,17 @@ def _get_layer_min_max(self, oid_field_name):

return (int(min_value), int(max_value))

def _get_feature(self, fid):
query_args = self._build_query_args({
'f': 'json',
})
url = self._build_url('/{}'.format(fid))
headers = self._build_headers()
response = self._request('GET', url, params=query_args, headers=headers)
fdata = self._handle_esri_errors(response, "Could not retrieve Feature")

return fdata.get('feature')

def _get_layer_oids(self):
query_args = self._build_query_args({
'where': '1=1', # So we get everything
Expand Down Expand Up @@ -297,11 +316,33 @@ def _scrape_an_envelope(self, envelope, outSR, max_records):
for feature in features:
yield feature

def _esri2geojson(self, feature):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's weird to have a wrapper function that is named the same as the function it's calling. Would it make sense to call this reproject_and_convert() or something instead?

oid_field_name = self._find_oid_field_name(self._metadata)
sr = self._metadata.get('sourceSpatialReference', {}).get('latestWkid')

if not sr:
sr = self._metadata.get('sourceSpatialReference', {}).get('wkid')

if (
sr and not feature.get('geometry')
and self._request_geometry
and self._has_individual_geom
and feature.get('attributes', {}).get(oid_field_name)
):
feature = self._get_feature(feature.get('attributes', {}).get(oid_field_name))

if not feature.get('geometry'):
self._has_individual_geom = False

return esri2geojson(feature, 'epsg:{}'.format(sr))
else:
return esri2geojson(feature)


def __iter__(self):
query_fields = self._fields
metadata = self.get_metadata()
page_size = min(1000, metadata.get('maxRecordCount', 500))
geometry_type = metadata.get('geometryType')
page_size = min(1000, self._metadata.get('maxRecordCount', 500))
geometry_type = self._metadata.get('geometryType')

row_count = None

Expand All @@ -312,8 +353,8 @@ def __iter__(self):

page_args = []

if not self._paginate_oid and row_count is not None and (metadata.get('supportsPagination') or \
(metadata.get('advancedQueryCapabilities') and metadata['advancedQueryCapabilities']['supportsPagination'])):
if not self._paginate_oid and row_count is not None and (self._metadata.get('supportsPagination') or \
(self._metadata.get('advancedQueryCapabilities') and self._metadata['advancedQueryCapabilities']['supportsPagination'])):
# If the layer supports pagination, we can use resultOffset/resultRecordCount to paginate

# There's a bug where some servers won't handle these queries in combination with a list of
Expand All @@ -340,12 +381,12 @@ def __iter__(self):
# If not, we can still use the `where` argument to paginate

use_oids = True
oid_field_name = self._find_oid_field_name(metadata)
oid_field_name = self._find_oid_field_name(self._metadata)

if not oid_field_name:
raise EsriDownloadError("Could not find object ID field name for deduplication")

if metadata.get('supportsStatistics'):
if self._metadata.get('supportsStatistics'):
# If the layer supports statistics, we can request maximum and minimum object ID
# to help build the pages
try:
Expand Down Expand Up @@ -405,7 +446,7 @@ def __iter__(self):
except EsriDownloadError:
self._logger.info("Falling back to geo queries")
# Use geospatial queries when none of the ID-based methods will work
bounds = metadata['extent']
bounds = self._metadata['extent']
saved = set()

for feature in self._scrape_an_envelope(bounds, self._outSR, page_size):
Expand All @@ -414,7 +455,7 @@ def __iter__(self):
if oid in saved:
continue

yield esri2geojson(feature)
yield self._esri2geojson(feature)

saved.add(oid)

Expand Down Expand Up @@ -459,4 +500,4 @@ def __iter__(self):
features = data.get('features')

for feature in features:
yield esri2geojson(feature)
yield self._esri2geojson(feature)
17 changes: 16 additions & 1 deletion esridump/esri2geojson.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
from itertools import tee
from functools import partial
from shapely.geometry import shape, mapping
from shapely.ops import transform
import pyproj

def esri2geojson(esrijson_feature):
def esri2geojson(esrijson_feature, srid = 'epsg:4326'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python style is usually no whitespace around ='s for arg defaults.

Suggested change
def esri2geojson(esrijson_feature, srid = 'epsg:4326'):
def esri2geojson(esrijson_feature, srid='epsg:4326'):

response = dict(type="Feature", geometry=None, properties=None)

geojson_geometry = convert_esri_geometry(esrijson_feature.get('geometry'))
if geojson_geometry:
if srid != 'epsg:4326':
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think it might be out of the scope of this tool to reproject. Usually we ask the server to give us the data back reprojected (with the outSRS parameter). Are you seeing servers that don't do that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@iandees Yeah the query API is the only endpoint that allows for ourSRS to be set. I played around with it a ton before I finally settled on having to do this. Not converting results in us outputting invalid GeoJSON per spec.

project = partial(
pyproj.transform,
pyproj.Proj(srid),
pyproj.Proj('epsg:4326'),
)

geojson_geometry = mapping(transform(lambda x, y: (y, x), transform(project, shape(geojson_geometry))))

response['geometry'] = geojson_geometry


esri_attributes = esrijson_feature.get('attributes')
if esri_attributes:
response['properties'] = esri_attributes

return response


def convert_esri_geometry(esri_geometry):
if esri_geometry is None:
return esri_geometry
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
packages=find_packages(exclude=('tests', 'docs')),
install_requires=[
'requests',
'pyproj == 3.0',
'shapely == 1.7',
'six',
],
entry_points={
Expand Down
2 changes: 1 addition & 1 deletion tests/cli_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def setUp(self):
)
self.add_fixture_response(
'.*query.*',
'us-ca-carson/us-ca-carson-0.json',
'us-ca-carson/us-ca-carson-query.json',
method='POST',
)

Expand Down
38 changes: 37 additions & 1 deletion tests/download_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_object_id_enumeration(self):
)
self.add_fixture_response(
'.*query.*',
'us-ca-carson/us-ca-carson-0.json',
'us-ca-carson/us-ca-carson-query.json',
method='POST',
)

Expand All @@ -55,6 +55,41 @@ def test_object_id_enumeration(self):

self.assertEqual(6, len(data))

def test_object_id_enumeration_missing_geom(self):
self.add_fixture_response(
'.*/\?f=json.*',
'us-ca-inyo/us-ca-inyo-metadata.json',
method='GET',
)
self.add_fixture_response(
'.*returnCountOnly=true.*',
'us-ca-inyo/us-ca-inyo-count-only.json',
method='GET',
)
self.add_fixture_response(
'.*returnIdsOnly=true.*',
'us-ca-inyo/us-ca-inyo-ids-only.json',
method='GET',
)
self.add_fixture_response(
'.*query.*',
'us-ca-inyo/us-ca-inyo-query.json',
method='POST',
)

self.add_fixture_response(
'.*/[0-9]+.*',
'us-ca-inyo/us-ca-inyo-feat.json',
method='GET',
)

dump = EsriDumper(self.fake_url)

data = list(dump)

self.assertEqual(6, len(data))

"""
def test_statistics_pagination(self):
self.add_fixture_response(
'.*/\?f=json.*',
Expand Down Expand Up @@ -403,3 +438,4 @@ def test_geo_queries_when_oid_enumeration_doesnt_work(self):
# bounding boxes.

self.assertEqual(2, len(data))
"""
Loading