diff --git a/.gitignore b/.gitignore index 4fa0acd..9a62847 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,6 @@ pom.xml.asc .lsp/ settings.xml settings2.xml -.DS_Store \ No newline at end of file +.DS_Store +.idea +*.iml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dc2200..460e2aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +0.0.7 / 2023-10-27 +------------------ +- Changed + - Fix parse xlsx without xl/sharedStrings.xml + - Fix parse cell with number and inlineStr data types + 0.0.6 / 2023-09-16 ------------------ - Changed diff --git a/project.clj b/project.clj index 0c76fa0..bd9308c 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject com.github.kbosompem/bb-excel "0.0.6" +(defproject com.github.kbosompem/bb-excel "0.0.7" :description "A Simple Clojure/Babashka Library for Reading Data from Excel Files" :url "https://github.com/kbosompem/bb-excel" :license {:name "EPL-2.0" diff --git a/src/bb_excel/core.clj b/src/bb_excel/core.clj index d3c71fc..6d96875 100644 --- a/src/bb_excel/core.clj +++ b/src/bb_excel/core.clj @@ -138,10 +138,13 @@ (assoc :x row) (assoc :y col))] (cond + ;; Possible data types well explained here https://stackoverflow.com/a/18346273 (= (:t u) "s") (dissoc (assoc-in u [:d] (dict (read-string (:d u)))) :t) (= (:t u) "str") (dissoc u :t) + (= (:t u) "inlineStr") (dissoc u :t) (= (:t u) "b") (dissoc (assoc-in u [:d] (if (= "1" (:d u)) true false)) :t) (= (:t u) "e") (assoc-in u [:d] (error-codes (:d u))) + (= (:t u) "n") (assoc u :d (parse-long (:d u))) (style-check u styles pcts) (assoc-in u [:d] (num2pct (:d u))) (style-check u styles dates) (assoc-in u [:d] (num2date (:d u))) (style-check u styles times) (assoc-in u [:d] (num2time (:d u))) @@ -169,13 +172,14 @@ (defn get-unique-strings "Get dictionary of all unique strings in the Excel spreadsheet" [^ZipFile zipfile] - (let [wb (.getEntry zipfile (str "xl/sharedStrings.xml")) - ins (.getInputStream zipfile wb) - x (parse-str (slurp ins))] - (->> - (filter #((:text-part tags) (:tag %)) (xml-seq x)) - (map get-cell-text) - (zipmap (range))))) + (if-let [wb (.getEntry zipfile (str "xl/sharedStrings.xml"))] + (let [ins (.getInputStream zipfile wb) + x (parse-str (slurp ins))] + (->> + (filter #((:text-part tags) (:tag %)) (xml-seq x)) + (map get-cell-text) + (zipmap (range)))) + {})) (defn get-styles "Get styles" diff --git a/test/core_test.clj b/test/core_test.clj index ddc75e9..682b2b0 100644 --- a/test/core_test.clj +++ b/test/core_test.clj @@ -49,6 +49,11 @@ (is (= '({:_r 10 :A "9" :B "TextData"}) (get-range (get-sheet "test/data/Types.xlsx" "Sheet1") "A10:B10"))))) +(deftest corner-cases-test + (testing "Without shared files" + (is (= '({:_r 1, :A 1}) + (get-sheet "test/data/without_sharedfiles.xlsx" 1))))) + (comment (run-tests) diff --git a/test/data/without_sharedfiles.xlsx b/test/data/without_sharedfiles.xlsx new file mode 100644 index 0000000..f52a2f0 Binary files /dev/null and b/test/data/without_sharedfiles.xlsx differ