Skip to content

Commit 87a1bb7

Browse files
committed
Add an option to treat empty CSV columns as nulls, not empty strings
1 parent 3cb09d0 commit 87a1bb7

13 files changed

+3320
-3
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
## 1.30.1
2+
3+
* Ensure that per-feature minzoom and maxzoom are integers
4+
* Report compression errors in tippecanoe-decode
5+
* Add the ability to specify the file format with -L{"format":"…"}
6+
* Add an option to treat empty CSV columns as nulls, not empty strings
7+
18
## 1.30.0
29

310
* Add a filter extension to allow filtering individual attributes

Makefile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,20 +178,24 @@ join-test: tile-join
178178
rm -f tests/join-population/macarthur-6-9.mbtiles.json.check tests/join-population/macarthur-6-9.mbtiles
179179
./tippecanoe -q -f -d10 -D10 -Z9 -z11 -o tests/join-population/macarthur2.mbtiles -l macarthur tests/join-population/macarthur2.json
180180
./tile-join --quiet --force -o tests/join-population/joined.mbtiles -x GEOID10 -c tests/join-population/population.csv tests/join-population/tabblock_06001420.mbtiles
181+
./tile-join --quiet --force -o tests/join-population/joined-null.mbtiles --empty-csv-columns-are-null -x GEOID10 -c tests/join-population/population.csv tests/join-population/tabblock_06001420.mbtiles
181182
./tile-join --quiet --force --no-tile-stats -o tests/join-population/joined-no-tile-stats.mbtiles -x GEOID10 -c tests/join-population/population.csv tests/join-population/tabblock_06001420.mbtiles
182183
./tile-join -q -f -i -o tests/join-population/joined-i.mbtiles -x GEOID10 -c tests/join-population/population.csv tests/join-population/tabblock_06001420.mbtiles
183184
./tile-join -q -f -o tests/join-population/merged.mbtiles tests/join-population/tabblock_06001420.mbtiles tests/join-population/macarthur.mbtiles tests/join-population/macarthur2.mbtiles
184185
./tile-join -q -f -c tests/join-population/windows.csv -o tests/join-population/windows.mbtiles tests/join-population/macarthur.mbtiles
185186
./tippecanoe-decode -x generator --maximum-zoom=11 --minimum-zoom=4 tests/join-population/joined.mbtiles > tests/join-population/joined.mbtiles.json.check
187+
./tippecanoe-decode -x generator --maximum-zoom=11 --minimum-zoom=4 tests/join-population/joined-null.mbtiles > tests/join-population/joined-null.mbtiles.json.check
186188
./tippecanoe-decode -x generator --maximum-zoom=11 --minimum-zoom=4 tests/join-population/joined-no-tile-stats.mbtiles > tests/join-population/joined-no-tile-stats.mbtiles.json.check
187189
./tippecanoe-decode -x generator tests/join-population/joined-i.mbtiles > tests/join-population/joined-i.mbtiles.json.check
188190
./tippecanoe-decode -x generator tests/join-population/merged.mbtiles > tests/join-population/merged.mbtiles.json.check
189191
./tippecanoe-decode -x generator tests/join-population/windows.mbtiles > tests/join-population/windows.mbtiles.json.check
190192
cmp tests/join-population/joined.mbtiles.json.check tests/join-population/joined.mbtiles.json
193+
cmp tests/join-population/joined-null.mbtiles.json.check tests/join-population/joined-null.mbtiles.json
191194
cmp tests/join-population/joined-no-tile-stats.mbtiles.json.check tests/join-population/joined-no-tile-stats.mbtiles.json
192195
cmp tests/join-population/joined-i.mbtiles.json.check tests/join-population/joined-i.mbtiles.json
193196
cmp tests/join-population/merged.mbtiles.json.check tests/join-population/merged.mbtiles.json
194197
cmp tests/join-population/windows.mbtiles.json.check tests/join-population/windows.mbtiles.json
198+
rm -f tests/join-population/joined-null.mbtiles tests/join-population/joined-null.mbtiles.json.check
195199
./tile-join -q -f -l macarthur -n "macarthur name" -N "macarthur description" -A "macarthur attribution" -o tests/join-population/just-macarthur.mbtiles tests/join-population/merged.mbtiles
196200
./tile-join -q -f -L macarthur -o tests/join-population/no-macarthur.mbtiles tests/join-population/merged.mbtiles
197201
./tippecanoe-decode -x generator tests/join-population/just-macarthur.mbtiles > tests/join-population/just-macarthur.mbtiles.json.check
@@ -253,8 +257,11 @@ join-filter-test:
253257
json-tool-test: tippecanoe-json-tool
254258
./tippecanoe-json-tool -e GEOID10 tests/join-population/tabblock_06001420.json | sort > tests/join-population/tabblock_06001420.json.sort
255259
./tippecanoe-json-tool -c tests/join-population/population.csv tests/join-population/tabblock_06001420.json.sort > tests/join-population/tabblock_06001420.json.sort.joined
260+
./tippecanoe-json-tool --empty-csv-columns-are-null -c tests/join-population/population.csv tests/join-population/tabblock_06001420.json.sort > tests/join-population/tabblock_06001420-null.json.sort.joined
256261
cmp tests/join-population/tabblock_06001420.json.sort.joined tests/join-population/tabblock_06001420.json.sort.joined.standard
262+
cmp tests/join-population/tabblock_06001420-null.json.sort.joined tests/join-population/tabblock_06001420-null.json.sort.joined.standard
257263
rm -f tests/join-population/tabblock_06001420.json.sort tests/join-population/tabblock_06001420.json.sort.joined
264+
rm -f tests/join-population/tabblock_06001420-null.json.sort.joined
258265

259266
allow-existing-test:
260267
# Make a tileset
@@ -283,6 +290,11 @@ csv-test:
283290
./tippecanoe-decode -x generator tests/csv/out.mbtiles > tests/csv/out.mbtiles.json.check
284291
cmp tests/csv/out.mbtiles.json.check tests/csv/out.mbtiles.json
285292
rm -f tests/csv/out.mbtiles.json.check tests/csv/out.mbtiles
293+
# Reading from named CSV, with nulls
294+
./tippecanoe -q --empty-csv-columns-are-null -zg -f -o tests/csv/out-null.mbtiles tests/csv/ne_110m_populated_places_simple.csv
295+
./tippecanoe-decode -x generator tests/csv/out-null.mbtiles > tests/csv/out-null.mbtiles.json.check
296+
cmp tests/csv/out-null.mbtiles.json.check tests/csv/out-null.mbtiles.json
297+
rm -f tests/csv/out-null.mbtiles.json.check tests/csv/out-null.mbtiles
286298
# Same, but specifying csv with -L format
287299
./tippecanoe -q -zg -f -o tests/csv/out.mbtiles -L'{"file":"", "format":"csv", "layer":"ne_110m_populated_places_simplecsv"}' < tests/csv/ne_110m_populated_places_simple.csv
288300
./tippecanoe-decode -x generator tests/csv/out.mbtiles > tests/csv/out.mbtiles.json.check

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ resolution is obtained than by using a smaller _maxzoom_ or _detail_.
214214
that are dropped, coalesced-as-needed, or clustered. The _operation_ may be
215215
`sum`, `product`, `mean`, `max`, `min`, `concat`, or `comma`
216216
to specify how the named _attribute_ is accumulated onto the attribute of the same name in a feature that does survive.
217+
* `-pe` or `--empty-csv-columns-are-null`: Treat empty CSV columns as nulls rather than as empty strings.
217218

218219
### Filtering features by attributes
219220

@@ -561,6 +562,7 @@ The options are:
561562
* `-i` or `--if-matched`: Only include features that matched the CSV.
562563
* `-j` *filter* or `--feature-filter`=*filter*: Check features against a per-layer filter (as defined in the [Mapbox GL Style Specification](https://www.mapbox.com/mapbox-gl-js/style-spec/#types-filter)) and only include those that match. Any features in layers that have no filter specified will be passed through. Filters for the layer `"*"` apply to all layers.
563564
* `-J` *filter-file* or `--feature-filter-file`=*filter-file*: Like `-j`, but read the filter from a file.
565+
* `-pe` or `--empty-csv-columns-are-null`: Treat empty CSV columns as nulls rather than as empty strings.
564566

565567
### Setting or disabling tile size limits
566568

@@ -686,6 +688,7 @@ something better.
686688
The formatting makes excessive use of `\u` quoting so that it follows JSON string rules but will still
687689
be sorted correctly by tools that just do ASCII comparisons.
688690
* `-c` *file.csv* or `--csv=`*file.csv*: Join attributes from the named sorted CSV file, using its first column as the join key. Geometries will be passed through even if they do not match the CSV; CSV lines that do not match a geometry will be discarded.
691+
* `-pe` or `--empty-csv-columns-are-null`: Treat empty CSV columns as nulls rather than as empty strings.
689692

690693
### Example
691694

geocsv.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "text.hpp"
99
#include "csv.hpp"
1010
#include "milo/dtoa_milo.h"
11+
#include "options.hpp"
1112

1213
void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fname, int layer, std::string layername) {
1314
FILE *f;
@@ -97,6 +98,9 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
9798
serial_val sv;
9899
if (is_number(line[i])) {
99100
sv.type = mvt_double;
101+
} else if (line[i].size() == 0 && prevent[P_EMPTY_CSV_COLUMNS]) {
102+
sv.type = mvt_null;
103+
line[i] = "null";
100104
} else {
101105
sv.type = mvt_string;
102106
}

jsontool.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const char *extract = NULL;
1818
FILE *csvfile = NULL;
1919
std::vector<std::string> header;
2020
std::vector<std::string> fields;
21+
int pe = false;
2122

2223
std::string buffered;
2324
int buffered_type = -1;
@@ -318,9 +319,11 @@ void join_csv(json_object *j) {
318319
} else if (is_number(v)) {
319320
attr_type = JSON_NUMBER;
320321
}
322+
} else if (pe) {
323+
attr_type = JSON_NULL;
321324
}
322325

323-
{
326+
if (attr_type != JSON_NULL) {
324327
// This knows more about the structure of JSON objects than it ought to
325328

326329
json_object *ko = (json_object *) malloc(sizeof(json_object));
@@ -433,6 +436,8 @@ int main(int argc, char **argv) {
433436
{"wrap", no_argument, 0, 'w'},
434437
{"extract", required_argument, 0, 'e'},
435438
{"csv", required_argument, 0, 'c'},
439+
{"empty-csv-columns-are-null", no_argument, &pe, 1},
440+
{"prevent", required_argument, 0, 'p'},
436441

437442
{0, 0, 0, 0},
438443
};
@@ -453,6 +458,9 @@ int main(int argc, char **argv) {
453458

454459
while ((i = getopt_long(argc, argv, getopt_str.c_str(), long_options, NULL)) != -1) {
455460
switch (i) {
461+
case 0:
462+
break;
463+
456464
case 'w':
457465
wrap = true;
458466
break;
@@ -465,6 +473,15 @@ int main(int argc, char **argv) {
465473
csv = optarg;
466474
break;
467475

476+
case 'p':
477+
if (strcmp(optarg, "e") == 0) {
478+
pe = true;
479+
} else {
480+
fprintf(stderr, "%s: Unknown option for -p%s\n", argv[0], optarg);
481+
exit(EXIT_FAILURE);
482+
}
483+
break;
484+
468485
default:
469486
fprintf(stderr, "Unexpected option -%c\n", i);
470487
exit(EXIT_FAILURE);

main.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2517,6 +2517,7 @@ int main(int argc, char **argv) {
25172517
{"attribute-type", required_argument, 0, 'T'},
25182518
{"attribute-description", required_argument, 0, 'Y'},
25192519
{"accumulate-attribute", required_argument, 0, 'E'},
2520+
{"empty-csv-columns-are-null", no_argument, &prevent[P_EMPTY_CSV_COLUMNS], 1},
25202521

25212522
{"Filtering features by attributes", 0, 0, 0},
25222523
{"feature-filter-file", required_argument, 0, 'J'},

man/tippecanoe.1

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,8 @@ If the type is \fB\fCint\fR and the original attribute was floating\-point, it i
244244
that are dropped, coalesced\-as\-needed, or clustered. The \fIoperation\fP may be
245245
\fB\fCsum\fR, \fB\fCproduct\fR, \fB\fCmean\fR, \fB\fCmax\fR, \fB\fCmin\fR, \fB\fCconcat\fR, or \fB\fCcomma\fR
246246
to specify how the named \fIattribute\fP is accumulated onto the attribute of the same name in a feature that does survive.
247+
.IP \(bu 2
248+
\fB\fC\-pe\fR or \fB\fC\-\-empty\-csv\-columns\-are\-null\fR: Treat empty CSV columns as nulls rather than as empty strings.
247249
.RE
248250
.SS Filtering features by attributes
249251
.RS
@@ -675,6 +677,8 @@ The options are:
675677
\fB\fC\-j\fR \fIfilter\fP or \fB\fC\-\-feature\-filter\fR=\fIfilter\fP: Check features against a per\-layer filter (as defined in the Mapbox GL Style Specification \[la]https://www.mapbox.com/mapbox-gl-js/style-spec/#types-filter\[ra]) and only include those that match. Any features in layers that have no filter specified will be passed through. Filters for the layer \fB\fC"*"\fR apply to all layers.
676678
.IP \(bu 2
677679
\fB\fC\-J\fR \fIfilter\-file\fP or \fB\fC\-\-feature\-filter\-file\fR=\fIfilter\-file\fP: Like \fB\fC\-j\fR, but read the filter from a file.
680+
.IP \(bu 2
681+
\fB\fC\-pe\fR or \fB\fC\-\-empty\-csv\-columns\-are\-null\fR: Treat empty CSV columns as nulls rather than as empty strings.
678682
.RE
679683
.SS Setting or disabling tile size limits
680684
.RS
@@ -829,6 +833,8 @@ The formatting makes excessive use of \fB\fC\\u\fR quoting so that it follows JS
829833
be sorted correctly by tools that just do ASCII comparisons.
830834
.IP \(bu 2
831835
\fB\fC\-c\fR \fIfile.csv\fP or \fB\fC\-\-csv=\fR\fIfile.csv\fP: Join attributes from the named sorted CSV file, using its first column as the join key. Geometries will be passed through even if they do not match the CSV; CSV lines that do not match a geometry will be discarded.
836+
.IP \(bu 2
837+
\fB\fC\-pe\fR or \fB\fC\-\-empty\-csv\-columns\-are\-null\fR: Treat empty CSV columns as nulls rather than as empty strings.
832838
.RE
833839
.SS Example
834840
.PP

options.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#define P_TILE_STATS ((int) 'g')
3838
#define P_USE_SOURCE_POLYGON_WINDING ((int) 'w')
3939
#define P_REVERSE_SOURCE_POLYGON_WINDING ((int) 'W')
40+
#define P_EMPTY_CSV_COLUMNS ((int) 'e')
4041

4142
extern int prevent[256];
4243
extern int additional[256];

tests/csv/out-null.mbtiles.json

Lines changed: 502 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)