From 82da723ce35d081899bbd3c90a353beedae6f2a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Thu, 19 Oct 2023 16:05:33 -0300
Subject: [PATCH] fix(FTP): remove DBF from content if DBC is present (#168)

---
 .gitignore             |  1 +
 pysus/data/__init__.py | 17 ++++++++++++-----
 pysus/ftp/__init__.py  | 10 ++++++++++
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6914fce0..db4575bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ pyvenv.cfg
 # *.DBF
 *.pickle
 *.parquet
+.virtual_documents
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/pysus/data/__init__.py b/pysus/data/__init__.py
index e10e523d..48b2354e 100644
--- a/pysus/data/__init__.py
+++ b/pysus/data/__init__.py
@@ -1,4 +1,6 @@
 import os
+import struct
+import logging
 from datetime import datetime
 from pathlib import Path
 
@@ -109,9 +111,12 @@ def dbf_to_parquet(dbf: str, _pbar=None) -> str:
             chunk_df = pd.DataFrame(chunk)
             table = pa.Table.from_pandas(chunk_df.applymap(decode_column))
             pq.write_to_dataset(table, root_path=str(parquet))
-    except Exception as exc:
-        parquet.absolute().unlink()
-        raise exc
+    except struct.error as err:
+        if _pbar:
+            _pbar.close()
+        Path(path).unlink()
+        parquet.rmdir()
+        raise err
 
     if _pbar:
         _pbar.update(approx_final_size - _pbar.n)
@@ -138,14 +143,16 @@ def str_to_int(string: str):
         # spaces as well
         if str(string).replace(" ", "").isnumeric():
             return int(string.replace(" ", ""))
+        return string
 
     def str_to_date(string: str):
         if isinstance(string, str):
             try:
                 return datetime.strptime(string, "%Y%m%d").date()
-            except Exception:
+            except ValueError:
                 # Ignore errors, bad value
-                pass
+                return string
+        return string
 
     map_column_func(["DT_NOTIFIC", "DT_SIN_PRI"], str_to_date)
     map_column_func(["CODMUNRES", "SEXO"], str_to_int)
diff --git a/pysus/ftp/__init__.py b/pysus/ftp/__init__.py
index 7c6f3811..a6245b3a 100644
--- a/pysus/ftp/__init__.py
+++ b/pysus/ftp/__init__.py
@@ -389,6 +389,16 @@ def line_file_parser(file_line):
     finally:
         ftp.close()
 
+    upper_names = [n.upper() for n in content]
+    to_remove = []
+    for name in content:
+        if ".DBF" in name.upper():
+            if name.upper().replace(".DBF", ".DBC") in upper_names:
+                to_remove.append(name)
+
+    for name in to_remove:
+        del content[name]
+
     return content