diff --git a/.gitignore b/.gitignore index c5d53a6..ce48c8c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist _version.py .idea/ venv/ +tmp/ diff --git a/dune_client/file.py b/dune_client/file.py index 2e69b77..75f1611 100644 --- a/dune_client/file.py +++ b/dune_client/file.py @@ -6,8 +6,9 @@ import logging import os.path from enum import Enum +from os.path import exists from pathlib import Path -from typing import TextIO +from typing import TextIO, Callable, List, Tuple # ndjson missing types: https://github.com/rhgrant10/ndjson/issues/10 import ndjson # type: ignore @@ -60,14 +61,17 @@ def load(self, file: TextIO) -> list[DuneRecord]: return list(ndjson.reader(file)) raise ValueError(f"Unrecognized FileType {self} for {file.name}") - def write(self, out_file: TextIO, data: list[DuneRecord]) -> None: + def write( + self, out_file: TextIO, data: list[DuneRecord], skip_headers: bool = False + ) -> None: """Writes `data` to `out_file`""" logger.debug(f"writing results to file {out_file.name}") if self == FileType.CSV: headers = data[0].keys() data_tuple = [tuple(rec.values()) for rec in data] dict_writer = csv.DictWriter(out_file, headers, lineterminator="\n") - dict_writer.writeheader() + if not skip_headers: + dict_writer.writeheader() writer = csv.writer(out_file, lineterminator="\n") writer.writerows(data_tuple) @@ -106,13 +110,77 @@ def _filepath(self, name: str, ftype: FileType) -> str: """Internal method for building absolute path.""" return os.path.join(self.path, name + str(ftype)) - def _write(self, data: list[DuneRecord], name: str, ftype: FileType) -> None: - # TODO - use @skip_empty decorator here. Couldn't get the types to work. + def _write(self, data: List[DuneRecord], name: str, ftype: FileType) -> None: + # The following three lines are duplicated in _append, due to python version compatibility + # https://github.com/cowprotocol/dune-client/issues/45 + # We will continue to support python < 3.10 until ~3.13, this issue will remain open. if len(data) == 0: logger.info(f"Nothing to write to {name}... skipping") - return + return None with open(self._filepath(name, ftype), "w", encoding=self.encoding) as out_file: ftype.write(out_file, data) + return None + + def _assert_matching_keys( + self, keys: Tuple[str, ...], fname: str, ftype: FileType + ) -> None: + with open(fname, "r", encoding=self.encoding) as file: + if ftype == FileType.CSV: + # Check matching headers. + headers = file.readline() + existing_keys = headers.strip().split(",") + elif ftype == FileType.JSON: + single_object = json.loads(file.readline())[0] + existing_keys = single_object.keys() + elif ftype == FileType.NDJSON: + single_object = json.loads(file.readline()) + existing_keys = single_object.keys() + + key_tuple = tuple(existing_keys) + assert keys == key_tuple, f"{keys} != {key_tuple}" + + def _append(self, data: List[DuneRecord], name: str, ftype: FileType) -> None: + if len(data) == 0: + logger.info(f"Nothing to write to {name}... skipping") + return None + fname = self._filepath(name, ftype) + if not exists(fname): + logger.warning( + f"File {fname} does not exist, using write instead of append!" + ) + return self._write(data, name, ftype) + + # validate that the incoming content to be appended has the same schema + # The skip empty decorator ensures existence of data[0]! + self._assert_matching_keys(tuple(data[0].keys()), fname, ftype) + + if ftype == FileType.JSON: + # These are JSON lists, so we have to concatenate the data. + with open(fname, "r", encoding=self.encoding) as existing_file: + existing_data = ftype.load(existing_file) + return self._write(existing_data + data, name, ftype) + + with open(fname, "a+", encoding=self.encoding) as out_file: + return ftype.write(out_file, data, skip_headers=True) + + def append_csv(self, data: list[DuneRecord], name: str) -> None: + """Appends `data` to csv file `name`""" + # This is a special case because we want to skip headers when the file already exists + # Additionally, we may want to validate that the headers actually coincide. + self._append(data, name, FileType.CSV) + + def append_json(self, data: list[DuneRecord], name: str) -> None: + """ + Appends `data` to json file `name` + This is the least efficient of all, since we have to load the entire file, + concatenate the lists and then overwrite the file! + Other filetypes such as CSV and NDJSON can be directly appended to! + """ + self._append(data, name, FileType.JSON) + + def append_ndjson(self, data: list[DuneRecord], name: str) -> None: + """Appends `data` to ndjson file `name`""" + self._append(data, name, FileType.NDJSON) def write_csv(self, data: list[DuneRecord], name: str) -> None: """Writes `data` to csv file `name`""" @@ -154,3 +222,6 @@ def load_singleton( ) -> DuneRecord: """Loads and returns single entry by index (default 0)""" return self._load(name, self._parse_ftype(ftype))[index] + + +WriteLikeSignature = Callable[[FileIO, List[DuneRecord], str, FileType], None] diff --git a/tests/unit/test_file.py b/tests/unit/test_file.py index 88e37a0..7b20243 100644 --- a/tests/unit/test_file.py +++ b/tests/unit/test_file.py @@ -16,7 +16,7 @@ def cleanup(): def cleanup_files(func): - """This decorator can be used for testing methods outside of this class""" + """This decorator can be used for testing methods outside this class""" def wrapped_func(self): func(self) @@ -48,6 +48,30 @@ def test_invertible_write_and_load(self): f"Assert invertible failed on {ftype}", ) + def test_append_ok(self): + for ftype in FileType: + self.file_manager._write(self.dune_records, TEST_FILE, ftype) + self.file_manager._append(self.dune_records, TEST_FILE, ftype) + loaded_records = self.file_manager._load(TEST_FILE, ftype) + expected = self.dune_records + self.dune_records + self.assertEqual( + expected, + loaded_records, + f"test_append failed on {ftype}", + ) + + def test_append_calls_write_on_new_file(self): + for ftype in FileType: + with self.assertLogs(level="WARNING"): + self.file_manager._append(self.dune_records, TEST_FILE, ftype) + + def test_append_error(self): + invalid_records = [{}] # Empty dict has different keys than self.dune_records + for ftype in FileType: + self.file_manager._write(self.dune_records, TEST_FILE, ftype) + with self.assertRaises(AssertionError): + self.file_manager._append(invalid_records, TEST_FILE, ftype) + def test_load_singleton(self): for file_type in FileType: self.file_manager._write(self.dune_records, TEST_FILE, file_type)