Improve node description and some smaller code improvements

koettert · koettert · commit 6bcc36fb9c26 · 2025-05-07T17:11:46.000+02:00
diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py
@@ -52,17 +52,54 @@ def validate_path(path: str) -> None:
 
 
 def clean_dataframe(df):
+    """
+    Cleans the given DataFrame by resetting its index and removing specific columns.
+
+    This function resets the index of the DataFrame, dropping the old index,
+    and removes the columns "<Row Key>" and "<RowID>" if they exist in the DataFrame.
+
+    Args:
+        df (pandas.DataFrame): The input DataFrame to be cleaned.
+
+    Returns:
+        pandas.DataFrame: A cleaned DataFrame with the index reset and specified columns removed.
+    """
     df = df.reset_index(drop=True)
     columns_to_drop = ["<Row Key>", "<RowID>"]
     return df.drop(columns=[col for col in columns_to_drop if col in df.columns])
 
 
 def check_overwrite(fileurl, existing_file):
+    """
+    Checks if a file already exists and raises an error if overwriting is not allowed.
+    Args:
+        fileurl (str): The path to the file to check.
+        existing_file (Enum): An enumeration value indicating the overwrite policy.
+            It should have a `FAIL` member to signify that overwriting is not allowed.
+    Raises:
+        knext.InvalidParametersError: If the file exists and the overwrite policy is set to FAIL.
+    """
+    import os
+
+    if existing_file == ExistingFile.FAIL.name and os.path.exists(fileurl):
+        raise knext.InvalidParametersError("File already exists.")
+
+
+def check_outdir(fileurl):
+    """
+    Ensures that the directory for the given file path exists. If the directory
+    does not exist, it is created.
+    Args:
+        fileurl (str): The file path for which the directory should be checked
+                       and created if necessary.
+    Raises:
+        OSError: If the directory cannot be created due to an operating system error.
+    """
     import os
 
-    if existing_file == ExistingFile.FAIL.name:
-        if os.path.exists(fileurl):
-            raise knext.InvalidParametersError("File already exists.")
+    output_dir = os.path.dirname(fileurl)
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir, exist_ok=True)
 
 
 class _EncodingOptions(knext.EnumParameterOptions):
@@ -131,8 +168,7 @@ def get_default(cls):
 For more details on the limitations when reading these files see 
 [here.](https://gdal.org/drivers/vector/kml.html#kml-reading)
 
-Examples of standard local file paths are *C:\\KNIMEworkspace\\test.geojson* for Windows and
-*/KNIMEworkspace/test.shp* for Linux. The node can also load resources directly from a web URL, for example to 
+The node can load resources directly from a web URL, for example to 
 load a GeoJSON file from [geojson.xyz](http://geojson.xyz/) you would enter
 *http://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_land.geojson*.
 
@@ -147,8 +183,8 @@ def get_default(cls):
 class GeoFileReaderNode:
     data_url = knext.LocalPathParameter(
         "Input file path",
-        "Select the file path for reading data.",
-        placeholder_text="Select input file path...",
+        "Select the file path or directly enter a remote URL for reading the data.",
+        placeholder_text="Select input file path or enter URL...",
         validator=validate_path,
     )
 
@@ -235,8 +271,6 @@ def execute(self, exec_context: knext.ExecutionContext):
     short_description="Write single layer GeoFile.",
     description="""This node writes the data in the format of [Shapefile](https://en.wikipedia.org/wiki/Shapefile), 
     [GeoJSON](https://geojson.org/), or [GeoParquet](https://github.com/opengeospatial/geoparquet).
-Examples of standard local file paths are *C:\\KNIMEworkspace\\test.shp* for Windows and
-*/KNIMEworkspace/test.geojson* for Linux. 
 
 The file extension e.g. *.shp*, *.geojson*,  or *.parquet* is appended automatically
 depending on the selected file format if not specified.""",
@@ -293,7 +327,7 @@ class GeoFileWriterNode:
 
     encoding = knext.EnumParameter(
         label="Encoding",
-        description="Select the encoding for reading the data file.",
+        description="Select the encoding for saving the data file.",
         default_value=_EncodingOptions.get_default().name,
         enum=_EncodingOptions,
         since_version="1.4.0",
@@ -311,17 +345,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
             0.4, "Writing file (This might take a while without progress changes)"
         )
 
-        import os
-
-        output_dir = os.path.dirname(self.data_url)
-        if output_dir and not os.path.exists(output_dir):
-            os.makedirs(output_dir, exist_ok=True)
-
+        check_outdir(self.data_url)
         gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col)
-        if "<Row Key>" in gdf.columns:
-            gdf = gdf.drop(columns="<Row Key>")
-        if "<RowID>" in gdf.columns:
-            gdf = gdf.drop(columns="<RowID>")
+        gdf = clean_dataframe(gdf)
+
         if self.dataformat == "Shapefile":
             fileurl = knut.ensure_file_extension(self.data_url, ".shp")
             check_overwrite(fileurl, self.existing_file)
@@ -389,8 +416,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
 You can also enter the number of the layer to read starting at 0. The node will output the names of all layers as 
 second output table, which can be used to revise the name of the target layer.
 
-Examples of standard local file paths are *C:\\KNIMEworkspace\\test.gpkg* for Windows and
-*/KNIMEworkspace/test.gpkg* for Linux. The node can also load resources directly from a web URL.
+The node can load resources directly from a web URL e.g. 
+*https://github.com/INSPIRE-MIF/gp-geopackage-encodings/raw/refs/heads/main/examples/GE-gpkg-template.gpkg*.
 
 **Note:** For larger files the node progress might not change for a time until the file is successfully read.
     """,
@@ -402,8 +429,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
 class GeoPackageReaderNode:
     data_url = knext.LocalPathParameter(
         "Input file path",
-        "Select the file path for reading data.",
-        placeholder_text="Select input file path...",
+        "Select the file path or directly enter a remote URL for reading the data.",
+        placeholder_text="Select input file path or enter URL...",
         validator=validate_path,
     )
 
@@ -480,8 +507,6 @@ def _get_layer(self, layerlist):
     short_description="Write GeoPackage layer.",
     description="""This node writes the data as new [Geopackage](https://www.geopackage.org/) file or 
 as layer into an existing file.
-Examples of standard local file paths are *C:\\KNIMEworkspace\\test.gpkg* for Windows and
-*/KNIMEworkspace/test.gpkg* for Linux. 
 
 **Note:** If file and layer already exist, the layer will be overwritten without a warning!
     """,
@@ -515,7 +540,7 @@ class GeoPackageWriterNode:
 
     encoding = knext.EnumParameter(
         label="Encoding",
-        description="Select the encoding for reading the data file.",
+        description="Select the encoding for saving the data file.",
         default_value=_EncodingOptions.get_default().name,
         enum=_EncodingOptions,
         since_version="1.4.0",
@@ -544,12 +569,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
         exec_context.set_progress(
             0.4, "Writing file (This might take a while without progress changes)"
         )
-        import os
 
         check_overwrite(self.data_url, self.existing_file)
-        output_dir = os.path.dirname(self.data_url)
-        if output_dir and not os.path.exists(output_dir):
-            os.makedirs(output_dir, exist_ok=True)
+
+        check_outdir(self.data_url)
 
         gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col)
         gdf = gdf.reset_index(drop=True)