diff --git a/geonode/base/models.py b/geonode/base/models.py index 4d7bd0f005b..41a4377e6a3 100644 --- a/geonode/base/models.py +++ b/geonode/base/models.py @@ -955,7 +955,7 @@ def raw_abstract(self): @property def can_be_downloaded(self): - return self.subtype in {"vector", "raster", "vector_time"} + return self.subtype in {"vector", "raster", "vector_time", "tabular"} @property def can_have_wfs_links(self): @@ -963,15 +963,19 @@ def can_have_wfs_links(self): @property def can_have_wps_links(self): - return self.subtype in {"vector", "tileStore", "remote", "wmsStore", "vector_time"} + return self.subtype in {"vector", "tileStore", "remote", "wmsStore", "vector_time", "tabular"} + + @property + def should_create_style(self): + return self.subtype != "tabular" @property def can_have_style(self): - return self.subtype not in {"tileStore", "remote"} + return self.subtype not in {"tabular", "tileStore", "remote", "tabular"} @property def can_have_thumbnail(self): - return self.subtype not in {"3dtiles", "cog", "flatgeobuf"} + return self.subtype not in {"tabular", "3dtiles", "cog", "flatgeobuf"} @property def raw_purpose(self): @@ -993,6 +997,15 @@ def raw_data_quality_statement(self): def detail_url(self): return self.get_absolute_url() + def fixup_store_type(self, keys, values): + from geonode.geoserver.helpers import get_dataset_storetype + + if self.subtype == "tabular": + return self + for key in keys: + setattr(self, key, get_dataset_storetype(values[key])) + return self + def clean(self): if self.title: self.title = self.title.replace(",", "_") diff --git a/geonode/geoserver/helpers.py b/geonode/geoserver/helpers.py index 131c521bb38..dfbc0c01869 100755 --- a/geonode/geoserver/helpers.py +++ b/geonode/geoserver/helpers.py @@ -101,6 +101,7 @@ ("application/wfs-collection-1.1", "vector"), ("application/zip", "vector"), ("text/csv", "vector"), + ("text/csv", "tabular"), ] DEFAULT_STYLE_NAME = ["generic", "line", "point", "polygon", "raster"] @@ -1974,10 +1975,7 @@ def sync_instance_with_geoserver(instance_id, *args, **kwargs): instance.gs_resource = gs_resource # Iterate over values from geoserver. - for key in ["alternate", "store", "subtype"]: - # attr_name = key if 'typename' not in key else 'alternate' - # print attr_name - setattr(instance, key, get_dataset_storetype(values[key])) + instance = instance.fixup_store_type(["alternate", "store", "subtype"], values) if updatemetadata: _sync_geoserver_keywords_to_instance(instance, gs_resource.keywords) @@ -2077,7 +2075,7 @@ def sync_instance_with_geoserver(instance_id, *args, **kwargs): # Refresh from DB instance.refresh_from_db() - if updatemetadata: + if updatemetadata and instance.should_create_style: # Save dataset styles logger.debug(f"... Refresh Legend links for Dataset {instance.title}") try: diff --git a/geonode/geoserver/tasks.py b/geonode/geoserver/tasks.py index 6cd6a4a31a6..e319f7819f9 100644 --- a/geonode/geoserver/tasks.py +++ b/geonode/geoserver/tasks.py @@ -154,6 +154,9 @@ def geoserver_create_style(self, instance_id, name, sld_file, tempdir): logger.debug(f"Dataset id {instance_id} does not exist yet!") raise + if not instance.should_create_style: + return + lock_id = f"{self.request.id}" if self.request.id else instance.name log_lock.debug(f"geoserver_create_style: Creating lock {lock_id} for {instance.name}") with AcquireLock(lock_id) as lock: diff --git a/geonode/layers/api/serializers.py b/geonode/layers/api/serializers.py index 5a95554c978..8eae5a79fd6 100644 --- a/geonode/layers/api/serializers.py +++ b/geonode/layers/api/serializers.py @@ -189,6 +189,7 @@ class Meta: "store", "subtype", "ptype", + "is_tabular", ) ) ) diff --git a/geonode/layers/models.py b/geonode/layers/models.py index 66e77cf6357..b9d753ecf50 100644 --- a/geonode/layers/models.py +++ b/geonode/layers/models.py @@ -166,6 +166,10 @@ def is_vector(self): def is_raster(self): return self.subtype == "raster" + @property + def is_tabular(self): + return self.subtype == "tabular" + @property def supports_time(self): valid_attributes = self.get_choices diff --git a/geonode/upload/celery_tasks.py b/geonode/upload/celery_tasks.py index 66bbebc7a3f..f9ef1299c8e 100644 --- a/geonode/upload/celery_tasks.py +++ b/geonode/upload/celery_tasks.py @@ -1046,6 +1046,9 @@ def upsert_data(self, execution_id, /, handler_module_path, action, **kwargs): _datastore.pre_processing(**kwargs) + if not _datastore.input_is_valid(): + raise Exception("dataset is invalid") + is_valid, errors = _datastore.upsert_validation(execution_id, **kwargs) if not is_valid: raise UpsertException(errors) diff --git a/geonode/upload/handlers/README.md b/geonode/upload/handlers/README.md index 7d67ca55cb3..5c1416d3d4b 100644 --- a/geonode/upload/handlers/README.md +++ b/geonode/upload/handlers/README.md @@ -149,7 +149,7 @@ class BaseVectorFileHandler(BaseHandler): return def create_geonode_resource( - self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify diff --git a/geonode/upload/handlers/common/raster.py b/geonode/upload/handlers/common/raster.py index be0d835e1eb..ea442a09e05 100644 --- a/geonode/upload/handlers/common/raster.py +++ b/geonode/upload/handlers/common/raster.py @@ -25,7 +25,6 @@ from typing import List from django.conf import settings -from django.db.models import Q from geonode.base.models import ResourceBase from geonode.layers.models import Dataset from geonode.resource.enumerator import ExecutionRequestAction as exa @@ -238,11 +237,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter( - Q(alternate__icontains=layer_name) | Q(title__icontains=layer_name) - ) - .first() - .srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, "raster_path": raster_path, } ] @@ -328,12 +323,7 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: return def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = Dataset, - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify @@ -385,12 +375,7 @@ def create_geonode_resource( return saved_dataset def overwrite_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = Dataset, - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None, **kwargs ): _exec = self._get_execution_request_object(execution_id) @@ -404,7 +389,19 @@ def overwrite_geonode_resource( if dataset.exists() and _overwrite: dataset = dataset.first() - dataset = resource_manager.update(dataset.uuid, instance=dataset) + dataset = resource_manager.update( + dataset.uuid, + instance=dataset, + vals=dict( + name=alternate, + workspace=dataset.workspace, + subtype="raster", + alternate=f"{dataset.workspace}:{alternate}", + dirty_state=True, + title=layer_name, + owner=_exec.user, + ), + ) self.handle_xml_file(dataset, _exec) self.handle_sld_file(dataset, _exec) diff --git a/geonode/upload/handlers/common/remote.py b/geonode/upload/handlers/common/remote.py index 11eec2f7cbe..98161effe58 100755 --- a/geonode/upload/handlers/common/remote.py +++ b/geonode/upload/handlers/common/remote.py @@ -204,6 +204,7 @@ def create_geonode_resource( execution_id: str, resource_type: ResourceBase = ResourceBase, asset=None, + **kwargs, ): """ Creating geonode base resource diff --git a/geonode/upload/handlers/common/tests_vector.py b/geonode/upload/handlers/common/tests_vector.py index 4be51b9ef8a..916564701bf 100644 --- a/geonode/upload/handlers/common/tests_vector.py +++ b/geonode/upload/handlers/common/tests_vector.py @@ -298,6 +298,9 @@ def test_import_resource_should_not_be_imported(self, celery_chord, ogr2ogr_driv If the resource exists and should be skept, the celery task is not going to be called and the layer is skipped """ + mocked_obj = MagicMock() + mocked_obj.name = "CSV" + ogr2ogr_driver.return_value = mocked_obj exec_id = None try: # create the executionId @@ -312,9 +315,9 @@ def test_import_resource_should_not_be_imported(self, celery_chord, ogr2ogr_driv # start the resource import self.handler.import_resource(files=self.valid_files, execution_id=str(exec_id)) self.assertIn( - "No valid layers found", + "not recognized as a supported file format.", exception.exception.args[0], - "No valid layers found.", + "not recognized as a supported file format.", ) celery_chord.assert_not_called() diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index d549535da6e..c1b9fc4b834 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -59,7 +59,7 @@ ) from geonode.resource.manager import resource_manager from geonode.resource.models import ExecutionRequest -from osgeo import ogr +from osgeo import ogr, gdal from geonode.upload.api.exceptions import ImportException, UpsertException from geonode.upload.celery_app import importer_app from geonode.assets.utils import copy_assets_and_links, get_default_asset @@ -78,7 +78,6 @@ from geonode.upload.registry import feature_validators_registry from django.core.exceptions import ValidationError - logger = logging.getLogger("importer") @@ -410,15 +409,11 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter( - Q(alternate__icontains=layer_name) | Q(title__icontains=layer_name) - ) - .first() - .srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, } ] - layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + layers = self.open_source_file(files) if not layers: return [] return [ @@ -426,7 +421,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw "name": alternate or layer_name, "crs": self.identify_authority(_l) if _l.GetSpatialRef() else None, } - for _l in layers + for _l in (self._extract_layer(_l) for _l in layers) if self.fixup_name(_l.GetName()) == layer_name ] @@ -436,14 +431,17 @@ def get_ogr2ogr_driver(self): """ return None + def _gdal_open_options(self): + return {} + def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: """ Main function to import the resource. Internally will call the steps required to import the data inside the geonode_data database """ - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) - layers = self._select_valid_layers(all_layers, execution_id=execution_id) + gdal_proxy = self.open_source_file(files) + layers = self._select_valid_layers(gdal_proxy, execution_id=execution_id) # for the moment we skip the dyanamic model creation layer_count = len(layers) logger.info(f"Total number of layers available: {layer_count}") @@ -463,7 +461,8 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: # start looping on the layers available - for index, layer in enumerate(layers, start=1): + for index, gdal_layer in enumerate(layers, start=1): + layer = self._extract_layer(gdal_layer) layer_name = self.fixup_name(layer.GetName()) should_be_overwritten = _exec.action == ira.REPLACE.value @@ -544,10 +543,26 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: raise e return layer_names, alternates, execution_id + def open_source_file(self, files): + """ + The importer switched from ogr to gdal library + This is required so we can rely on the options in GDAL, + while the ogr library does not allow that. + For example we can call the AUTODETECT_TYPE even in python + """ + gdal_proxy = gdal.OpenEx( + files.get("base_file"), + nOpenFlags=gdal.OF_VECTOR, + allowed_drivers=[self.get_ogr2ogr_driver().name], + **self._gdal_open_options(), + ) + return [gdal_proxy] + def _select_valid_layers(self, all_layers, **kwargs): layers = [] for layer in all_layers: try: + layer = self._extract_layer(layer) self.identify_authority(layer) layers.append(layer) except Exception as e: @@ -558,14 +573,17 @@ def _select_valid_layers(self, all_layers, **kwargs): pass return layers + def can_overwrite(self, _exec_obj, dataset): + is_tabular = _exec_obj.input_params.get("is_tabular", None) + return dataset.is_vector() if not is_tabular else is_tabular + def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten): if _exec_obj.input_params.get("resource_pk"): dataset = Dataset.objects.filter(pk=_exec_obj.input_params.get("resource_pk")).first() if not dataset: raise ImportException("The dataset selected for the ovewrite does not exists") - if should_be_overwritten: - if not dataset.is_vector(): - raise Exception("Cannot override a raster dataset with a vector one") + if should_be_overwritten and not self.can_overwrite(_exec_obj, dataset): + raise Exception("Cannot override a raster dataset with a vector one") alternate = dataset.alternate.split(":") return alternate[-1] @@ -573,9 +591,8 @@ def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten dataset_available = Dataset.objects.filter(alternate__iexact=f"{workspace.name}:{layer_name}") dataset_exists = dataset_available.exists() - if should_be_overwritten: - if not dataset_available.is_vector(): - raise Exception("Cannot override a raster dataset with a vector one") + if should_be_overwritten and not self.can_overwrite(_exec_obj, dataset): + raise Exception("Cannot override a raster dataset with a vector one") if dataset_exists and should_be_overwritten: alternate = dataset_available.first().alternate.split(":")[-1] @@ -602,7 +619,7 @@ def setup_dynamic_model( - celery_group -> the celery group of the field creation """ - layer_name = self.fixup_name(layer.GetName() if isinstance(layer, ogr.Layer) else layer) + layer_name = self.fixup_name(self._extract_layer(layer).GetName()) _exec_obj = orchestrator.get_execution_object(execution_id) is_dynamic_model_managed = _exec_obj.input_params.get("is_dynamic_model_managed", False) @@ -779,12 +796,7 @@ def promote_geom_to_multi(self, geom): return geom def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = Dataset, - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify @@ -811,7 +823,7 @@ def create_geonode_resource( saved_dataset = resource_manager.create( None, resource_type=resource_type, - defaults=self.generate_resource_payload(layer_name, alternate, asset, _exec, workspace), + defaults=self.generate_resource_payload(layer_name, alternate, asset, _exec, workspace, **kwargs), ) saved_dataset.refresh_from_db() @@ -828,12 +840,12 @@ def create_geonode_resource( return saved_dataset - def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace): + def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs): return dict( name=alternate, workspace=workspace, store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), - subtype="vector", + subtype=kwargs.pop("subtype", None) or "vector", alternate=f"{workspace}:{alternate}", dirty_state=True, title=layer_name, @@ -859,7 +871,9 @@ def overwrite_geonode_resource( if dataset.exists() and _overwrite: dataset = dataset.first() - dataset = self.refresh_geonode_resource(str(_exec.exec_id), asset, dataset, create_asset=False) + dataset = self.refresh_geonode_resource( + str(_exec.exec_id), asset, dataset, create_asset=False, layer_name=layer_name + ) return dataset elif not dataset.exists() and _overwrite: logger.warning( @@ -941,11 +955,17 @@ def copy_geonode_resource( new_alternate: str, **kwargs, ): - + subtype = None + previous_resource = ResourceBase.objects.filter( + alternate__contains=kwargs.get("kwargs", {}).get("original_dataset_alternate") + ).first() + if previous_resource: + subtype = previous_resource.subtype new_resource = self.create_geonode_resource( layer_name=data_to_update.get("title"), alternate=new_alternate, execution_id=str(_exec.exec_id), + subtype=subtype, ) copy_assets_and_links(resource, target=new_resource) @@ -1122,7 +1142,7 @@ def upsert_validation(self, files, execution_id, **kwargs: dict) -> Tuple[bool, return True, None except Exception as e: - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) if layers := self._select_valid_layers(all_layers, execution_id=execution_id): _errors = e.args[0] if isinstance(e, UpsertException) else [str(e)] if isinstance(_errors, str): @@ -1134,6 +1154,11 @@ def upsert_validation(self, files, execution_id, **kwargs: dict) -> Tuple[bool, "User does not have enough permissions to perform this action on the selected resource" ) + def _extract_layer(self, layer): + if not isinstance(layer, ogr.Layer): + layer = layer.GetLayer() + return layer + def __get_new_and_original_schema(self, files, execution_id): # check if the execution_id is passed and if the geonode resource exists exec_id = orchestrator.get_execution_object(execution_id) @@ -1146,27 +1171,31 @@ def __get_new_and_original_schema(self, files, execution_id): target_schema_fields = FieldSchema.objects.filter(model_schema__name=target_resource.alternate.split(":")[-1]) # use ogr2ogr to read the uploaded files for the upsert - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) layers = self._select_valid_layers(all_layers, execution_id=execution_id) if not layers: raise UpsertException("No valid layers found in the provided file for upsert.") - layer = layers[0] + layer = self._extract_layer(layers[0]) # evaluate if some of the fid entry is null. if is null we stop the workflow # the user should provide the completed list with the fid set - sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME}" IS NULL' - - # Execute the SQL query to the layer - result = all_layers.ExecuteSQL(sql_query) - if not result or (result and result.GetFeatureCount() > 0): - raise UpsertException( - f"All the feature in the file must have the fid field correctly populated. Number of None value: {result.GetFeatureCount() if result else 'all'}" - ) + if exec_id.action == ira.UPSERT.value: + sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME.lower()}" IS NULL' + + # Execute the SQL query to the layer via the gdal proxy object + result = all_layers[0].ExecuteSQL(sql_query) + if ( + not (result and DEFAULT_PK_COLUMN_NAME in (x.name.lower() for x in result.schema)) + or (result and result.GetFeatureCount() > 0) + or not result + ): + raise UpsertException( + f"All the feature in the file must have the fid field correctly populated. Number of None value: {result.GetFeatureCount() if result else 'all'}" + ) # Will generate the same schema as the target_resource_schema new_file_schema_fields = self.create_dynamic_model_fields( - layer, - return_celery_group=False, + layer, return_celery_group=False, execution_id=execution_id ) return target_schema_fields, new_file_schema_fields @@ -1236,7 +1265,7 @@ def upsert_data(self, files, execution_id, **kwargs): OriginalResource = model.as_model() # use ogr2ogr to read the uploaded files values for the upsert - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) valid_create = 0 valid_update = 0 layers = self._select_valid_layers(all_layers, execution_id=execution_id) @@ -1345,8 +1374,8 @@ def _create_error_log(self, exec_obj, layers, errors): "Error found during the upsert process, no update/create will be perfomed. The error log is going to be created..." ) errors_to_print = errors[: settings.UPSERT_LIMIT_ERROR_LOG] - - log_name = f'error_{layers[0].GetName()}_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.csv' + layer = self._extract_layer(layers[0]) + log_name = f'error_{layer.GetName()}_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.csv' with tempfile.TemporaryDirectory() as temp_dir_str: temp_dir = Path(temp_dir_str) @@ -1433,10 +1462,11 @@ def _save_feature(self, data_chunk, model_obj, model_instance, upsert_key, valid # need to simulate the "promote to multi" used by the upload process. # here we cannot rely on ogr2ogr so we need to do it manually geom = feature.GetGeometryRef() - code = geom.GetSpatialReference().GetAuthorityCode(None) - feature_as_dict.update( - {self.default_geometry_column_name: f"SRID={code};{self.promote_geom_to_multi(geom).ExportToWkt()}"} - ) + if geom: + code = geom.GetSpatialReference().GetAuthorityCode(None) + feature_as_dict.update( + {self.default_geometry_column_name: f"SRID={code};{self.promote_geom_to_multi(geom).ExportToWkt()}"} + ) if use_get_fid: feature_as_dict[upsert_key] = feature.GetFID() to_process.append(feature_as_dict) @@ -1511,7 +1541,20 @@ def refresh_geonode_resource(self, execution_id, asset=None, dataset=None, creat set_geowebcache_invalidate_cache(dataset_alternate=dataset.alternate) logging.debug(f"set_geowebcache_invalidate_cache DONE {datetime.now() - start}") - dataset = resource_manager.update(dataset.uuid, instance=dataset) + payload = self.generate_resource_payload( + kwargs.get("layer_name", dataset.alternate), + dataset.alternate.split(":")[-1], + asset, + exec_obj, + dataset.workspace, + ) + payload.pop("asset") + + dataset = resource_manager.update( + dataset.uuid, + instance=dataset, + vals=payload, + ) self.handle_xml_file(dataset, exec_obj) self.handle_sld_file(dataset, exec_obj) diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index dc0c4442864..05a2f563266 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -17,7 +17,10 @@ # ######################################################################### import logging +import os +from geonode.geoserver.createlayer.utils import BBOX +from geonode.layers.models import Dataset from geonode.resource.enumerator import ExecutionRequestAction as exa from geonode.upload.api.exceptions import UploadParallelismLimitException from geonode.upload.utils import UploadLimitValidator @@ -29,6 +32,7 @@ from dynamic_models.models import ModelSchema from geonode.upload.handlers.common.vector import BaseVectorFileHandler from geonode.upload.handlers.utils import GEOM_TYPE_MAPPING +from geonode.upload.orchestrator import orchestrator from django.db.models import Q from geonode.resource.models import ExecutionRequest from geonode.security.utils import get_visible_resources @@ -107,7 +111,6 @@ def is_valid(files, user, **kwargs): has_lat = any(x in CSVFileHandler().possible_lat_column for x in schema_keys) has_long = any(x in CSVFileHandler().possible_long_column for x in schema_keys) - fields = CSVFileHandler().possible_geometry_column_name + CSVFileHandler().possible_latlong_column if has_lat and not has_long: raise InvalidCSVException( f"Longitude is missing. Supported names: {', '.join(CSVFileHandler().possible_long_column)}" @@ -119,13 +122,19 @@ def is_valid(files, user, **kwargs): ) if not geom_is_in_schema and not has_lat and not has_long: - raise InvalidCSVException(f"Not enough geometry field are set. The possibilities are: {','.join(fields)}") + exec_obj = orchestrator.get_execution_object(kwargs.get("execution_id")) + exec_obj.input_params.update({"is_tabular": True}) + exec_obj.save() + return True return True def get_ogr2ogr_driver(self): return ogr.GetDriverByName("CSV") + def _gdal_open_options(self): + return {"open_options": ["AUTODETECT_TYPE=YES"]} + @staticmethod def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, **kwargs): """ @@ -137,6 +146,7 @@ def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, ** return ( f"{base_command} -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME={BaseVectorFileHandler().default_geometry_column_name} " + additional_option + + " -oo AUTODETECT_TYPE=YES" ) def create_dynamic_model_fields( @@ -149,11 +159,17 @@ def create_dynamic_model_fields( return_celery_group: bool = True, ): # retrieving the field schema from ogr2ogr and converting the type to Django Types - layer_schema = [{"name": x.name.lower(), "class_name": self._get_type(x), "null": True} for x in layer.schema] + layer_schema = [ + {"name": self.fixup_name(x.name.lower()), "class_name": self._get_type(x), "null": True} + for x in layer.schema + ] + exec_obj = orchestrator.get_execution_object(execution_id) + if ( layer.GetGeometryColumn() or self.default_geometry_column_name and ogr.GeometryTypeToName(layer.GetGeomType()) not in ["Geometry Collection", "Unknown (any)"] + and not exec_obj.input_params.get("is_tabular") ): # the geometry colum is not returned rom the layer.schema, so we need to extract it manually # checking if the geometry has been wrogly read as string @@ -217,7 +233,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw raise ValueError("Unable to resolve resource for copy action.") return [{"name": alternate, "crs": original_resource.srid}] - layers = self.get_ogr2ogr_driver().Open(files.get("base_file"), 0) + layers = self.open_source_file(files) if not layers: return [] return [ @@ -226,12 +242,36 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw "crs": (self.identify_authority(_l)), } for _l in layers - if self.fixup_name(_l.GetName()) == layer_name + if self.fixup_name(self._extract_layer(_l).GetName()) == layer_name ] def identify_authority(self, layer): + layer = self._extract_layer(layer) try: authority_code = super().identify_authority(layer=layer) + if authority_code == ogr.wkbNone: + logger.warning("For tabular CSV, we set by default EPSG:4326") return authority_code except Exception: return "EPSG:4326" + + def create_geonode_resource( + self, layer_name, alternate, execution_id, resource_type: Dataset = Dataset, asset=None, **kwargs + ): + res = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset, **kwargs) + res.set_bbox_polygon(BBOX, res.srid) + return res + + def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs): + subtype = kwargs.pop("subtype", None) + return dict( + name=alternate, + workspace=workspace, + store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), + subtype=subtype or ("tabular" if _exec.input_params.get("is_tabular") else "vector"), + alternate=f"{workspace}:{alternate}", + dirty_state=True, + title=layer_name, + owner=_exec.user, + asset=asset, + ) diff --git a/geonode/upload/handlers/csv/tests.py b/geonode/upload/handlers/csv/tests.py index 4a27b29325e..c4fc3edbb2f 100644 --- a/geonode/upload/handlers/csv/tests.py +++ b/geonode/upload/handlers/csv/tests.py @@ -32,6 +32,8 @@ from geonode.upload.handlers.csv.handler import CSVFileHandler from osgeo import ogr +from geonode.upload.utils import ExecutionRequest + class TestCSVHandler(TestCase): databases = ("default", "datastore") @@ -79,12 +81,25 @@ def test_is_valid_should_raise_exception_if_the_csv_is_invalid(self): self.assertIsNotNone(_exc) self.assertTrue("The CSV provided is invalid, no layers found" in str(_exc.exception.detail)) - def test_is_valid_should_raise_exception_if_the_csv_missing_geom(self): - with self.assertRaises(InvalidCSVException) as _exc: - self.handler.is_valid(files={"base_file": self.missing_geom}, user=self.user) + def test_is_valid_should_set_as_tabular_if_the_csv_miss_geom(self): + exec_obj = ExecutionRequest.objects.create( + user=self.user, + func_name="test", + geonode_resource=self.layer, + input_params={ + "uuid": self.layer.uuid, + "owner": self.layer.owner.username, + "resource_type": self.layer.resource_type, + "defaults": f'{{"owner":"{self.layer.owner.username}"}}', + }, + ) + self.handler.is_valid( + files={"base_file": self.missing_geom}, user=self.user, execution_id=str(exec_obj.exec_id) + ) - self.assertIsNotNone(_exc) - self.assertTrue("Not enough geometry field are set" in str(_exc.exception.detail)) + exec_obj.refresh_from_db() + + self.assertTrue(exec_obj.input_params.get("is_tabular", False)) def test_is_valid_should_raise_exception_if_the_csv_missing_lat(self): with self.assertRaises(InvalidCSVException) as _exc: @@ -195,7 +210,8 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command(self, "X_POSSIBLE_NAMES=x,long*", "-oo", "Y_POSSIBLE_NAMES=y,lat*", + "-oo", + "AUTODETECT_TYPE=YES", ] - _open.assert_called_once() _open.assert_called_with(expected_cmd_list, stdout=-1, stderr=-1, shell=False) diff --git a/geonode/upload/handlers/remote/tiles3d.py b/geonode/upload/handlers/remote/tiles3d.py index 01f14c7f037..586218c7426 100644 --- a/geonode/upload/handlers/remote/tiles3d.py +++ b/geonode/upload/handlers/remote/tiles3d.py @@ -85,8 +85,9 @@ def create_geonode_resource( execution_id: str, resource_type: Dataset = ResourceBase, asset=None, + **kwargs, ): - resource = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset, **kwargs) _exec = orchestrator.get_execution_object(exec_id=execution_id) try: js_file = requests.get(_exec.input_params.get("url"), timeout=10).json() diff --git a/geonode/upload/handlers/remote/wms.py b/geonode/upload/handlers/remote/wms.py index 69bb444992a..aec93994090 100644 --- a/geonode/upload/handlers/remote/wms.py +++ b/geonode/upload/handlers/remote/wms.py @@ -120,18 +120,13 @@ def generate_alternate( return layer_name, payload_alternate def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = ..., - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = ..., asset=None, **kwargs ): """ Use the default RemoteResourceHandler to create the geonode resource after that, we assign the bbox and re-generate the thumbnail """ - resource = super().create_geonode_resource(layer_name, alternate, execution_id, Dataset, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, Dataset, asset, **kwargs) _exec = orchestrator.get_execution_object(execution_id) remote_bbox = _exec.input_params.get("bbox") if remote_bbox: diff --git a/geonode/upload/handlers/tiles3d/handler.py b/geonode/upload/handlers/tiles3d/handler.py index 7c191ebf1d6..6202c4c84bd 100755 --- a/geonode/upload/handlers/tiles3d/handler.py +++ b/geonode/upload/handlers/tiles3d/handler.py @@ -250,16 +250,11 @@ def pre_processing(self, files, execution_id, **kwargs): return _data, execution_id def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = ..., - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = ..., asset=None, **kwargs ): exec_obj = orchestrator.get_execution_object(execution_id) - resource = super().create_geonode_resource(layer_name, alternate, execution_id, ResourceBase, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, ResourceBase, asset, **kwargs) asset = self.create_asset_and_link( resource, files=exec_obj.input_params["files"], diff --git a/geonode/upload/tests/end2end/test_end2end.py b/geonode/upload/tests/end2end/test_end2end.py index 2ccbcdd2c1d..6bd4a017b92 100644 --- a/geonode/upload/tests/end2end/test_end2end.py +++ b/geonode/upload/tests/end2end/test_end2end.py @@ -67,6 +67,7 @@ def setUpClass(cls) -> None: cls.valid_tif = f"{project_dir}/tests/fixture/test_raster.tif" cls.valid_csv = f"{project_dir}/tests/fixture/valid.csv" + cls.missing_geom_csv = f"{project_dir}/tests/fixture/missing_geom.csv" cls.url = reverse("importer_upload") ogc_server_settings = OGC_Servers_Handler(settings.OGC_SERVER)["default"] @@ -302,7 +303,7 @@ def test_import_geojson_overwrite(self): class ImporterGCSVImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") - def test_import_geojson(self): + def test_import_csv(self): self._cleanup_layers(name="valid") payload = {"base_file": open(self.valid_csv, "rb"), "action": "upload"} @@ -325,6 +326,47 @@ def test_import_csv_overwrite(self): self._cleanup_layers(name="valid") +class ImporterGCSVTabularImportTest(BaseImporterEndToEndTest): + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") + def test_import_tabular_csv(self): + self._cleanup_layers(name="missing_geom") + + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + self._assertimport( + payload, + initial_name, + assert_payload={ + "subtype": "tabular", + "resource_type": "dataset", + }, + ) + self._cleanup_layers(name="missing_geom") + + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") + def test_import_tabular_csv_overwrite(self): + self._cleanup_layers(name="missing_geom") + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + prev_dataset = self._assertimport( + payload, + initial_name, + keep_resource=True, + assert_payload={ + "subtype": "tabular", + "resource_type": "dataset", + }, + ) + + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "replace"} + initial_name = "missing_geom" + payload["resource_pk"] = prev_dataset.pk + self._assertimport(payload, initial_name, overwrite=True, last_update=prev_dataset.last_updated) + self._cleanup_layers(name="missing_geom") + + class ImporterKMLImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") diff --git a/geonode/upload/tests/unit/test_publisher.py b/geonode/upload/tests/unit/test_publisher.py index 9d7f7e06e10..5890d26875f 100644 --- a/geonode/upload/tests/unit/test_publisher.py +++ b/geonode/upload/tests/unit/test_publisher.py @@ -39,7 +39,7 @@ def setUp(self): layer = self.publisher.cat.get_resources("stazioni_metropolitana", workspaces="geonode") print("delete layer") if layer: - res = self.publisher.cat.delete(layer.resource, purge="all", recurse=True) + res = self.publisher.cat.delete(layer[0], purge="all", recurse=True) print(res.status_code) print(res.json) @@ -66,20 +66,6 @@ def test_extract_resource_name_and_crs(self): expected = {"crs": "EPSG:32632", "name": "stazioni_metropolitana"} self.assertDictEqual(expected, values_found[0]) - def test_extract_resource_name_and_crs_return_empty_if_the_file_does_not_exists( - self, - ): - """ - Given a layer and the original file, should extract the crs and the name - to let it publish in Geoserver - """ - values_found = self.publisher.extract_resource_to_publish( - files={"base_file": "/wrong/path/file.gpkg"}, - action="upload", - layer_name="stazioni_metropolitana", - ) - self.assertListEqual([], values_found) - @patch("geonode.upload.publisher.create_geoserver_db_featurestore") def test_get_or_create_store_creation_should_called(self, datastore): with patch.dict(os.environ, {"GEONODE_GEODATABASE": "not_existsing_db"}, clear=True):