diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d0ad8cd..c97e9d10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ # Changelog +# 7.030 + * [issue-408](https://github.com/techascent/tech.ml.dataset/issues/408) - xlsx files with numberic column names now load. + * dtype-next upgrade fixing a few issues, most notably [issue-99](https://github.com/cnuernber/dtype-next/issues/99). + # 7.029 * large parquet files now load - slowly as loading can't be parallelized - without holding onto more memory than they should. diff --git a/src/tech/v3/dataset/io/context.clj b/src/tech/v3/dataset/io/context.clj index c5952f08..c90dbd20 100644 --- a/src/tech/v3/dataset/io/context.clj +++ b/src/tech/v3/dataset/io/context.clj @@ -90,8 +90,12 @@ colparser-compute-fn (reify Function (apply [this col-idx] (let [colname (col-idx->colname col-idx) - colname (if (empty? colname) + colname (cond + (number? colname) + colname + (empty? colname) (make-colname col-idx) + :else (utils/remove-zero-width-spaces colname)) colname (if (and ensure-unique-column-names? (get colname->idx colname)) diff --git a/test/tech/v3/libs/fastexcel_test.clj b/test/tech/v3/libs/fastexcel_test.clj index f9de2b12..c1aedbdb 100644 --- a/test/tech/v3/libs/fastexcel_test.clj +++ b/test/tech/v3/libs/fastexcel_test.clj @@ -92,3 +92,8 @@ (is (some? (ds/column ds "column::2"))) (is (some? (ds/column ds "column::4"))) (is (some? (ds/column ds "column-1::6")))))) + + +(deftest number-colname + (let [ds (ds/->dataset "test/data/number_column.xlsx")] + (is (= (first (ds/column-names ds)) 0.0))))