Skip to content

Commit 6bcc36f

Browse files
committed
Improve node description and some smaller code improvements
1 parent 71f3912 commit 6bcc36f

File tree

1 file changed

+54
-31
lines changed
  • knime_extension/src/nodes

1 file changed

+54
-31
lines changed

knime_extension/src/nodes/io.py

Lines changed: 54 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,54 @@ def validate_path(path: str) -> None:
5252

5353

5454
def clean_dataframe(df):
55+
"""
56+
Cleans the given DataFrame by resetting its index and removing specific columns.
57+
58+
This function resets the index of the DataFrame, dropping the old index,
59+
and removes the columns "<Row Key>" and "<RowID>" if they exist in the DataFrame.
60+
61+
Args:
62+
df (pandas.DataFrame): The input DataFrame to be cleaned.
63+
64+
Returns:
65+
pandas.DataFrame: A cleaned DataFrame with the index reset and specified columns removed.
66+
"""
5567
df = df.reset_index(drop=True)
5668
columns_to_drop = ["<Row Key>", "<RowID>"]
5769
return df.drop(columns=[col for col in columns_to_drop if col in df.columns])
5870

5971

6072
def check_overwrite(fileurl, existing_file):
73+
"""
74+
Checks if a file already exists and raises an error if overwriting is not allowed.
75+
Args:
76+
fileurl (str): The path to the file to check.
77+
existing_file (Enum): An enumeration value indicating the overwrite policy.
78+
It should have a `FAIL` member to signify that overwriting is not allowed.
79+
Raises:
80+
knext.InvalidParametersError: If the file exists and the overwrite policy is set to FAIL.
81+
"""
82+
import os
83+
84+
if existing_file == ExistingFile.FAIL.name and os.path.exists(fileurl):
85+
raise knext.InvalidParametersError("File already exists.")
86+
87+
88+
def check_outdir(fileurl):
89+
"""
90+
Ensures that the directory for the given file path exists. If the directory
91+
does not exist, it is created.
92+
Args:
93+
fileurl (str): The file path for which the directory should be checked
94+
and created if necessary.
95+
Raises:
96+
OSError: If the directory cannot be created due to an operating system error.
97+
"""
6198
import os
6299

63-
if existing_file == ExistingFile.FAIL.name:
64-
if os.path.exists(fileurl):
65-
raise knext.InvalidParametersError("File already exists.")
100+
output_dir = os.path.dirname(fileurl)
101+
if output_dir and not os.path.exists(output_dir):
102+
os.makedirs(output_dir, exist_ok=True)
66103

67104

68105
class _EncodingOptions(knext.EnumParameterOptions):
@@ -131,8 +168,7 @@ def get_default(cls):
131168
For more details on the limitations when reading these files see
132169
[here.](https://gdal.org/drivers/vector/kml.html#kml-reading)
133170
134-
Examples of standard local file paths are *C:\\KNIMEworkspace\\test.geojson* for Windows and
135-
*/KNIMEworkspace/test.shp* for Linux. The node can also load resources directly from a web URL, for example to
171+
The node can load resources directly from a web URL, for example to
136172
load a GeoJSON file from [geojson.xyz](http://geojson.xyz/) you would enter
137173
*http://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_land.geojson*.
138174
@@ -147,8 +183,8 @@ def get_default(cls):
147183
class GeoFileReaderNode:
148184
data_url = knext.LocalPathParameter(
149185
"Input file path",
150-
"Select the file path for reading data.",
151-
placeholder_text="Select input file path...",
186+
"Select the file path or directly enter a remote URL for reading the data.",
187+
placeholder_text="Select input file path or enter URL...",
152188
validator=validate_path,
153189
)
154190

@@ -235,8 +271,6 @@ def execute(self, exec_context: knext.ExecutionContext):
235271
short_description="Write single layer GeoFile.",
236272
description="""This node writes the data in the format of [Shapefile](https://en.wikipedia.org/wiki/Shapefile),
237273
[GeoJSON](https://geojson.org/), or [GeoParquet](https://github.com/opengeospatial/geoparquet).
238-
Examples of standard local file paths are *C:\\KNIMEworkspace\\test.shp* for Windows and
239-
*/KNIMEworkspace/test.geojson* for Linux.
240274
241275
The file extension e.g. *.shp*, *.geojson*, or *.parquet* is appended automatically
242276
depending on the selected file format if not specified.""",
@@ -293,7 +327,7 @@ class GeoFileWriterNode:
293327

294328
encoding = knext.EnumParameter(
295329
label="Encoding",
296-
description="Select the encoding for reading the data file.",
330+
description="Select the encoding for saving the data file.",
297331
default_value=_EncodingOptions.get_default().name,
298332
enum=_EncodingOptions,
299333
since_version="1.4.0",
@@ -311,17 +345,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
311345
0.4, "Writing file (This might take a while without progress changes)"
312346
)
313347

314-
import os
315-
316-
output_dir = os.path.dirname(self.data_url)
317-
if output_dir and not os.path.exists(output_dir):
318-
os.makedirs(output_dir, exist_ok=True)
319-
348+
check_outdir(self.data_url)
320349
gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col)
321-
if "<Row Key>" in gdf.columns:
322-
gdf = gdf.drop(columns="<Row Key>")
323-
if "<RowID>" in gdf.columns:
324-
gdf = gdf.drop(columns="<RowID>")
350+
gdf = clean_dataframe(gdf)
351+
325352
if self.dataformat == "Shapefile":
326353
fileurl = knut.ensure_file_extension(self.data_url, ".shp")
327354
check_overwrite(fileurl, self.existing_file)
@@ -389,8 +416,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
389416
You can also enter the number of the layer to read starting at 0. The node will output the names of all layers as
390417
second output table, which can be used to revise the name of the target layer.
391418
392-
Examples of standard local file paths are *C:\\KNIMEworkspace\\test.gpkg* for Windows and
393-
*/KNIMEworkspace/test.gpkg* for Linux. The node can also load resources directly from a web URL.
419+
The node can load resources directly from a web URL e.g.
420+
*https://github.com/INSPIRE-MIF/gp-geopackage-encodings/raw/refs/heads/main/examples/GE-gpkg-template.gpkg*.
394421
395422
**Note:** For larger files the node progress might not change for a time until the file is successfully read.
396423
""",
@@ -402,8 +429,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
402429
class GeoPackageReaderNode:
403430
data_url = knext.LocalPathParameter(
404431
"Input file path",
405-
"Select the file path for reading data.",
406-
placeholder_text="Select input file path...",
432+
"Select the file path or directly enter a remote URL for reading the data.",
433+
placeholder_text="Select input file path or enter URL...",
407434
validator=validate_path,
408435
)
409436

@@ -480,8 +507,6 @@ def _get_layer(self, layerlist):
480507
short_description="Write GeoPackage layer.",
481508
description="""This node writes the data as new [Geopackage](https://www.geopackage.org/) file or
482509
as layer into an existing file.
483-
Examples of standard local file paths are *C:\\KNIMEworkspace\\test.gpkg* for Windows and
484-
*/KNIMEworkspace/test.gpkg* for Linux.
485510
486511
**Note:** If file and layer already exist, the layer will be overwritten without a warning!
487512
""",
@@ -515,7 +540,7 @@ class GeoPackageWriterNode:
515540

516541
encoding = knext.EnumParameter(
517542
label="Encoding",
518-
description="Select the encoding for reading the data file.",
543+
description="Select the encoding for saving the data file.",
519544
default_value=_EncodingOptions.get_default().name,
520545
enum=_EncodingOptions,
521546
since_version="1.4.0",
@@ -544,12 +569,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_1):
544569
exec_context.set_progress(
545570
0.4, "Writing file (This might take a while without progress changes)"
546571
)
547-
import os
548572

549573
check_overwrite(self.data_url, self.existing_file)
550-
output_dir = os.path.dirname(self.data_url)
551-
if output_dir and not os.path.exists(output_dir):
552-
os.makedirs(output_dir, exist_ok=True)
574+
575+
check_outdir(self.data_url)
553576

554577
gdf = gp.GeoDataFrame(input_1.to_pandas(), geometry=self.geo_col)
555578
gdf = gdf.reset_index(drop=True)

0 commit comments

Comments
 (0)