diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d07a67dd..6433788c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,9 +21,9 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, windows-latest, macos-latest] - pyv: ['3.8', '3.9', '3.10', '3.11', '3.12'] + pyv: ['3.9', '3.10', '3.11', '3.12'] include: - - {os: ubuntu-latest, pyv: 'pypy3.8'} + - {os: ubuntu-latest, pyv: 'pypy3.9'} - {os: macos-14, pyv: '3.11'} steps: diff --git a/pyproject.toml b/pyproject.toml index 3c289fc6..e4540ea7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,14 +12,13 @@ license = {text = "Apache-2.0"} authors = [{ name = "Iterative", email = "support@dvc.org" }] classifiers = [ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Development Status :: 4 - Beta", ] -requires-python = ">=3.8" +requires-python = ">=3.9" dynamic = ["version"] dependencies = [ "funcy>=1.14", diff --git a/src/dvc_data/callbacks.py b/src/dvc_data/callbacks.py index 8a1abcdc..55dddfca 100644 --- a/src/dvc_data/callbacks.py +++ b/src/dvc_data/callbacks.py @@ -3,7 +3,7 @@ import re import sys from threading import RLock -from typing import Any, BinaryIO, ClassVar, Dict, Optional, Type, Union +from typing import Any, BinaryIO, ClassVar, Optional, Union import fsspec from tqdm import tqdm @@ -39,7 +39,7 @@ class Tqdm(tqdm): " [{elapsed}<{remaining}, {rate_fmt:>11}]" ) BAR_FMT_NOTOTAL = "{desc}{bar:b}|{postfix[info]}{n_fmt} [{elapsed}, {rate_fmt:>11}]" - BYTES_DEFAULTS: ClassVar[Dict[str, Any]] = { + BYTES_DEFAULTS: ClassVar[dict[str, Any]] = { "unit": "B", "unit_scale": True, "unit_divisor": 1024, @@ -146,7 +146,7 @@ def __init__( size: Optional[int] = None, value: int = 0, progress_bar: Optional["tqdm"] = None, - tqdm_cls: Optional[Type["tqdm"]] = None, + tqdm_cls: Optional[type["tqdm"]] = None, **tqdm_kwargs, ): tqdm_kwargs.pop("total", None) diff --git a/src/dvc_data/cli.py b/src/dvc_data/cli.py index 56e06693..c222131f 100644 --- a/src/dvc_data/cli.py +++ b/src/dvc_data/cli.py @@ -10,7 +10,6 @@ from itertools import accumulate from pathlib import Path from posixpath import relpath -from typing import List import click import typer @@ -580,7 +579,7 @@ def checkout( path: Path = typer.Argument(..., resolve_path=True), relink: bool = False, force: bool = False, - type: List[LinkEnum] = typer.Option(["copy"]), # noqa: A002 + type: list[LinkEnum] = typer.Option(["copy"]), # noqa: A002 ): odb = get_odb(type=[t.value for t in type]) oid = from_shortoid(odb, oid) diff --git a/src/dvc_data/fs.py b/src/dvc_data/fs.py index cbdefe1f..29451128 100644 --- a/src/dvc_data/fs.py +++ b/src/dvc_data/fs.py @@ -5,7 +5,7 @@ import posixpath import typing from collections import deque -from typing import Any, BinaryIO, NamedTuple, Optional, Tuple +from typing import Any, BinaryIO, NamedTuple, Optional from fsspec import AbstractFileSystem from fsspec.callbacks import DEFAULT_CALLBACK @@ -43,7 +43,7 @@ def join(cls, *parts: str) -> str: return posixpath.join(*parts) @classmethod - def parts(cls, path: str) -> Tuple[str, ...]: + def parts(cls, path: str) -> tuple[str, ...]: ret = [] while True: path, part = posixpath.split(path) @@ -77,10 +77,10 @@ def relpath(self, path: str, start: Optional[str] = None) -> str: start = "." return posixpath.relpath(self.abspath(path), start=self.abspath(start)) - def relparts(self, path: str, start: Optional[str] = None) -> Tuple[str, ...]: + def relparts(self, path: str, start: Optional[str] = None) -> tuple[str, ...]: return self.parts(self.relpath(path, start=start)) - def _get_key(self, path: str) -> Tuple[str, ...]: + def _get_key(self, path: str) -> tuple[str, ...]: path = self.abspath(path) if path == self.root_marker: return () @@ -125,7 +125,7 @@ def _cache_remote_file( fs: "FileSystem", path: "AnyFSPath", hash_info: Optional["HashInfo"], - ) -> Tuple["FileSystem", "AnyFSPath"]: + ) -> tuple["FileSystem", "AnyFSPath"]: from dvc_objects.fs.local import LocalFileSystem odb: "HashFileDB" = cache_storage.odb diff --git a/src/dvc_data/hashfile/__init__.py b/src/dvc_data/hashfile/__init__.py index 97b2b993..99668720 100644 --- a/src/dvc_data/hashfile/__init__.py +++ b/src/dvc_data/hashfile/__init__.py @@ -1,6 +1,7 @@ """DVC data.""" import logging -from typing import TYPE_CHECKING, Iterator, Union, cast +from collections.abc import Iterator +from typing import TYPE_CHECKING, Union, cast from .tree import Tree diff --git a/src/dvc_data/hashfile/_ignore.py b/src/dvc_data/hashfile/_ignore.py index 3ea4cee5..d5d1d1db 100644 --- a/src/dvc_data/hashfile/_ignore.py +++ b/src/dvc_data/hashfile/_ignore.py @@ -1,4 +1,5 @@ -from typing import TYPE_CHECKING, Any, Iterator +from collections.abc import Iterator +from typing import TYPE_CHECKING, Any from typing_extensions import Protocol diff --git a/src/dvc_data/hashfile/build.py b/src/dvc_data/hashfile/build.py index 5cb0ec72..297940d2 100644 --- a/src/dvc_data/hashfile/build.py +++ b/src/dvc_data/hashfile/build.py @@ -1,7 +1,7 @@ import hashlib import logging import os -from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast +from typing import TYPE_CHECKING, Any, Optional, cast from fsspec.callbacks import DEFAULT_CALLBACK, Callback @@ -45,7 +45,7 @@ def _upload_file( odb: "HashFileDB", upload_odb: "HashFileDB", callback: Optional[Callback] = None, -) -> Tuple[Meta, HashFile]: +) -> tuple[Meta, HashFile]: from dvc_objects.fs.utils import tmp_fname from .hash import HashStreamFile @@ -131,7 +131,7 @@ def _build_tree( # NOTE: we know for sure that root starts with path, so we can use # faster string manipulation instead of a more robust relparts() - rel_key: Tuple[Optional[Any], ...] = () + rel_key: tuple[Optional[Any], ...] = () if root != path: rel_key = tuple(root[len(path) + 1 :].split(fs.sep)) @@ -160,7 +160,7 @@ def _build_tree( return tree_meta, tree -_url_cache: Dict[str, str] = {} +_url_cache: dict[str, str] = {} def _make_staging_url(fs: "FileSystem", odb: "HashFileDB", path: Optional[str]): @@ -232,7 +232,7 @@ def build( upload: bool = False, dry_run: bool = False, **kwargs, -) -> Tuple["HashFileDB", "Meta", "HashFile"]: +) -> tuple["HashFileDB", "Meta", "HashFile"]: """Stage (prepare) objects from the given path for addition to an ODB. Returns at tuple of (object_store, object) where addition to the ODB can diff --git a/src/dvc_data/hashfile/checkout.py b/src/dvc_data/hashfile/checkout.py index dbefd92c..b607727c 100644 --- a/src/dvc_data/hashfile/checkout.py +++ b/src/dvc_data/hashfile/checkout.py @@ -1,6 +1,6 @@ import logging from itertools import chain -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Optional from dvc_objects.fs.generic import test_links, transfer from fsspec.callbacks import DEFAULT_CALLBACK @@ -24,7 +24,7 @@ def __init__(self, path: str) -> None: class CheckoutError(Exception): - def __init__(self, paths: List[str]) -> None: + def __init__(self, paths: list[str]) -> None: self.paths = paths super().__init__("Checkout failed") diff --git a/src/dvc_data/hashfile/db/__init__.py b/src/dvc_data/hashfile/db/__init__.py index 9a5aff7f..c24a7147 100644 --- a/src/dvc_data/hashfile/db/__init__.py +++ b/src/dvc_data/hashfile/db/__init__.py @@ -3,7 +3,7 @@ import os from contextlib import suppress from copy import copy -from typing import TYPE_CHECKING, Callable, ClassVar, List, Optional, Union +from typing import TYPE_CHECKING, Callable, ClassVar, Optional, Union from dvc_objects.db import ObjectDB from dvc_objects.errors import ObjectFormatError @@ -49,7 +49,7 @@ def get_index(odb) -> "ObjectDBIndexBase": class HashFileDB(ObjectDB): DEFAULT_VERIFY = False - DEFAULT_CACHE_TYPES: ClassVar[List[str]] = ["copy"] + DEFAULT_CACHE_TYPES: ClassVar[list[str]] = ["copy"] CACHE_MODE: Optional[int] = None def __init__(self, fs: "FileSystem", path: str, read_only: bool = False, **config): @@ -72,9 +72,9 @@ def get(self, oid: str) -> HashFile: def add( self, - path: Union["AnyFSPath", List["AnyFSPath"]], + path: Union["AnyFSPath", list["AnyFSPath"]], fs: "FileSystem", - oid: Union[str, List[str]], + oid: Union[str, list[str]], hardlink: bool = False, callback: "Callback" = DEFAULT_CALLBACK, check_exists: bool = True, diff --git a/src/dvc_data/hashfile/db/index.py b/src/dvc_data/hashfile/db/index.py index 6b5d9f64..40a8c333 100644 --- a/src/dvc_data/hashfile/db/index.py +++ b/src/dvc_data/hashfile/db/index.py @@ -1,7 +1,8 @@ import logging import os from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Iterable, Iterator, Set +from collections.abc import Iterable, Iterator +from typing import TYPE_CHECKING from dvc_objects.errors import ObjectDBError @@ -44,7 +45,7 @@ def update(self, dir_hashes: Iterable[str], file_hashes: Iterable[str]) -> None: pass @abstractmethod - def intersection(self, hashes: Set[str]) -> Iterator[str]: + def intersection(self, hashes: set[str]) -> Iterator[str]: pass @@ -73,7 +74,7 @@ def clear(self) -> None: def update(self, dir_hashes: Iterable[str], file_hashes: Iterable[str]) -> None: pass - def intersection(self, hashes: Set[str]) -> Iterator[str]: + def intersection(self, hashes: set[str]) -> Iterator[str]: yield from [] @@ -131,6 +132,6 @@ def update(self, dir_hashes: Iterable[str], file_hashes: Iterable[str]) -> None: except Timeout as exc: raise ObjectDBError("Failed to update ODB index") from exc - def intersection(self, hashes: Set[str]) -> Iterator[str]: + def intersection(self, hashes: set[str]) -> Iterator[str]: """Iterate over values from `hashes` which exist in the index.""" yield from hashes.intersection(self.index.keys()) diff --git a/src/dvc_data/hashfile/db/local.py b/src/dvc_data/hashfile/db/local.py index 90b9f11f..d3f97eb4 100644 --- a/src/dvc_data/hashfile/db/local.py +++ b/src/dvc_data/hashfile/db/local.py @@ -2,7 +2,7 @@ import os import stat from functools import partial -from typing import ClassVar, List +from typing import ClassVar from dvc_objects.db import noop, wrap_iter from dvc_objects.errors import ObjectDBError, ObjectFormatError @@ -19,7 +19,7 @@ class LocalHashFileDB(HashFileDB): - DEFAULT_CACHE_TYPES: ClassVar[List[str]] = ["reflink", "copy"] + DEFAULT_CACHE_TYPES: ClassVar[list[str]] = ["reflink", "copy"] CACHE_MODE = 0o444 UNPACKED_DIR_SUFFIX = ".unpacked" diff --git a/src/dvc_data/hashfile/db/migrate.py b/src/dvc_data/hashfile/db/migrate.py index b5fe8635..543d658d 100644 --- a/src/dvc_data/hashfile/db/migrate.py +++ b/src/dvc_data/hashfile/db/migrate.py @@ -1,5 +1,5 @@ from functools import partial, wraps -from typing import TYPE_CHECKING, Any, Callable, Dict, List, NamedTuple, Tuple +from typing import TYPE_CHECKING, Any, Callable, NamedTuple from dvc_objects.executors import ThreadPoolExecutor from fsspec.callbacks import DEFAULT_CALLBACK @@ -14,8 +14,8 @@ class PreparedMigration(NamedTuple): src: "HashFileDB" dest: "HashFileDB" - paths: List[str] - oids: List[str] + paths: list[str] + oids: list[str] def migrate( @@ -65,7 +65,7 @@ def _hash_task( path: str, callback: "Callback" = DEFAULT_CALLBACK, **kwargs, -) -> Tuple[str, str]: +) -> tuple[str, str]: from dvc_data.hashfile.hash import hash_file func = _wrap_hash_file(callback, hash_file) @@ -79,7 +79,7 @@ def _hash_task( def _wrap_hash_file(callback: "Callback", fn: Callable): @wraps(fn) def func(path: str, *args, **kwargs): - kw: Dict[str, Any] = dict(kwargs) + kw: dict[str, Any] = dict(kwargs) with callback.branched(path, path) as child: res = fn(path, *args, callback=child, **kw) callback.relative_update() diff --git a/src/dvc_data/hashfile/db/reference.py b/src/dvc_data/hashfile/db/reference.py index 3c275356..65abb058 100644 --- a/src/dvc_data/hashfile/db/reference.py +++ b/src/dvc_data/hashfile/db/reference.py @@ -1,5 +1,5 @@ import logging -from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Callable, Optional, Union from dvc_data.hashfile.obj import HashFile @@ -15,7 +15,7 @@ class ReferenceHashFileDB(HashFileDB): def __init__(self, fs: "FileSystem", path: str, **config): super().__init__(fs, path, **config) - self._obj_cache: Dict["str", "HashFile"] = {} + self._obj_cache: dict["str", "HashFile"] = {} def __hash__(self): return hash((self.fs.protocol, self.path, *self._obj_cache.keys())) @@ -31,9 +31,9 @@ def get(self, oid: str): def add( self, - path: Union["AnyFSPath", List["AnyFSPath"]], + path: Union["AnyFSPath", list["AnyFSPath"]], fs: "FileSystem", - oid: Union[str, List[str]], + oid: Union[str, list[str]], hardlink: bool = False, callback: Optional["Callback"] = None, check_exists: bool = True, diff --git a/src/dvc_data/hashfile/diff.py b/src/dvc_data/hashfile/diff.py index 77379c24..68d4d13c 100644 --- a/src/dvc_data/hashfile/diff.py +++ b/src/dvc_data/hashfile/diff.py @@ -1,5 +1,5 @@ import reprlib -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Optional from attrs import asdict, define, field @@ -19,7 +19,7 @@ @define(hash=True, order=True) class TreeEntry: in_cache: bool = field(default=False, eq=False) - key: Tuple[str, ...] = () + key: tuple[str, ...] = () meta: Optional["Meta"] = field(default=None, eq=False) oid: Optional["HashInfo"] = None @@ -55,16 +55,16 @@ def __bool__(self): @define class DiffResult: - added: List[Change] = field(factory=list, repr=reprlib.repr) - modified: List[Change] = field(factory=list, repr=reprlib.repr) - deleted: List[Change] = field(factory=list, repr=reprlib.repr) - unchanged: List[Change] = field(factory=list, repr=reprlib.repr) + added: list[Change] = field(factory=list, repr=reprlib.repr) + modified: list[Change] = field(factory=list, repr=reprlib.repr) + deleted: list[Change] = field(factory=list, repr=reprlib.repr) + unchanged: list[Change] = field(factory=list, repr=reprlib.repr) def __bool__(self): return bool(self.added or self.modified or self.deleted) @property - def stats(self) -> Dict[str, int]: + def stats(self) -> dict[str, int]: return {k: len(v) for k, v in asdict(self).items() if k != "unchanged"} diff --git a/src/dvc_data/hashfile/gc.py b/src/dvc_data/hashfile/gc.py index 293b5a6a..92022cfe 100644 --- a/src/dvc_data/hashfile/gc.py +++ b/src/dvc_data/hashfile/gc.py @@ -1,4 +1,5 @@ -from typing import TYPE_CHECKING, Iterable, Optional +from collections.abc import Iterable +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from .db import HashFileDB diff --git a/src/dvc_data/hashfile/hash.py b/src/dvc_data/hashfile/hash.py index 7efaf62b..6d0b7b76 100644 --- a/src/dvc_data/hashfile/hash.py +++ b/src/dvc_data/hashfile/hash.py @@ -1,7 +1,7 @@ import hashlib import io import logging -from typing import TYPE_CHECKING, BinaryIO, Optional, Tuple, cast +from typing import TYPE_CHECKING, BinaryIO, Optional, cast from dvc_objects.fs import localfs from fsspec.callbacks import DEFAULT_CALLBACK, Callback @@ -135,7 +135,7 @@ def _hash_file( name: str, callback: "Callback" = DEFAULT_CALLBACK, info: Optional[dict] = None, -) -> Tuple["str", Meta]: +) -> tuple["str", Meta]: info = info or fs.info(path) meta = Meta.from_info(info, fs.protocol) @@ -188,7 +188,7 @@ def hash_file( state: Optional["StateBase"] = None, callback: Optional["Callback"] = None, info: Optional[dict] = None, -) -> Tuple["Meta", "HashInfo"]: +) -> tuple["Meta", "HashInfo"]: if state: meta, hash_info = state.get(path, fs, info=info) if hash_info and hash_info.name == name: diff --git a/src/dvc_data/hashfile/hash_info.py b/src/dvc_data/hashfile/hash_info.py index 08ba710f..4b125970 100644 --- a/src/dvc_data/hashfile/hash_info.py +++ b/src/dvc_data/hashfile/hash_info.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Optional from attrs import define, field @@ -18,14 +18,14 @@ def __str__(self) -> str: return f"{self.name}: {self.value}" @classmethod - def from_dict(cls, d: Dict[str, str]) -> "HashInfo": + def from_dict(cls, d: dict[str, str]) -> "HashInfo": if not d: return cls() ((name, value),) = d.items() return cls(name, value) - def to_dict(self) -> Dict[str, str]: + def to_dict(self) -> dict[str, str]: if not self.value or not self.name: return {} return {self.name: self.value} diff --git a/src/dvc_data/hashfile/meta.py b/src/dvc_data/hashfile/meta.py index 08fa251f..c3ba27d6 100644 --- a/src/dvc_data/hashfile/meta.py +++ b/src/dvc_data/hashfile/meta.py @@ -1,4 +1,4 @@ -from typing import Any, ClassVar, Dict, List, Optional +from typing import Any, ClassVar, Optional from attrs import define, field, fields_dict from dvc_objects.fs.utils import is_exec @@ -18,7 +18,7 @@ class Meta: PARAM_MTIME: ClassVar[str] = "mtime" PARAM_REMOTE: ClassVar[str] = "remote" - fields: ClassVar[List[str]] + fields: ClassVar[list[str]] isdir: bool = False size: Optional[int] = None @@ -34,7 +34,7 @@ class Meta: remote: Optional[str] = field(default=None, eq=False) @classmethod - def from_info(cls, info: Dict[str, Any], protocol: Optional[str] = None) -> "Meta": + def from_info(cls, info: dict[str, Any], protocol: Optional[str] = None) -> "Meta": etag = info.get("etag") checksum = info.get("checksum") @@ -73,15 +73,15 @@ def from_info(cls, info: Dict[str, Any], protocol: Optional[str] = None) -> "Met ) @classmethod - def from_dict(cls, d: Dict[str, Any]) -> "Meta": + def from_dict(cls, d: dict[str, Any]) -> "Meta": kwargs = {} for field_ in cls.fields: if field_ in d: kwargs[field_] = d[field_] return cls(**kwargs) - def to_dict(self) -> Dict[str, Any]: - ret: Dict[str, Any] = {} + def to_dict(self) -> dict[str, Any]: + ret: dict[str, Any] = {} if self.isdir: ret[self.PARAM_ISDIR] = self.isdir diff --git a/src/dvc_data/hashfile/status.py b/src/dvc_data/hashfile/status.py index 59492170..765a7261 100644 --- a/src/dvc_data/hashfile/status.py +++ b/src/dvc_data/hashfile/status.py @@ -1,5 +1,6 @@ import logging -from typing import TYPE_CHECKING, Dict, Iterable, NamedTuple, Optional, Set +from collections.abc import Iterable +from typing import TYPE_CHECKING, NamedTuple, Optional from dvc_objects.fs import Schemes @@ -17,15 +18,15 @@ class StatusResult(NamedTuple): - exists: Set["HashInfo"] - missing: Set["HashInfo"] + exists: set["HashInfo"] + missing: set["HashInfo"] class CompareStatusResult(NamedTuple): - ok: Set["HashInfo"] - missing: Set["HashInfo"] - new: Set["HashInfo"] - deleted: Set["HashInfo"] + ok: set["HashInfo"] + missing: set["HashInfo"] + new: set["HashInfo"] + deleted: set["HashInfo"] def _indexed_dir_hashes( @@ -37,7 +38,7 @@ def _indexed_dir_hashes( dir_hashes = set(dir_objs.keys()) indexed_dirs = set(index.dir_hashes()) - indexed_dir_exists: Set[str] = set() + indexed_dir_exists: set[str] = set() if indexed_dirs: hashes = QueryingProgress( odb.list_oids_exists(indexed_dirs, jobs=jobs), @@ -108,8 +109,8 @@ def status( # noqa: C901, PLR0912 if cache_odb is None: cache_odb = odb - hash_infos: Dict[str, "HashInfo"] = {} - dir_objs: Dict[str, Optional["HashFile"]] = {} + hash_infos: dict[str, "HashInfo"] = {} + dir_objs: dict[str, Optional["HashFile"]] = {} for hash_info in obj_ids: assert hash_info.value if hash_info.isdir: @@ -129,8 +130,8 @@ def status( # noqa: C901, PLR0912 # assume memfs staged objects already exist return StatusResult(set(hash_infos.values()), set()) - hashes: Set[str] = set(hash_infos.keys()) - exists: Set[str] = set() + hashes: set[str] = set(hash_infos.keys()) + exists: set[str] = set() logger.debug("Collecting status from '%s'", odb.path) if index and hashes: diff --git a/src/dvc_data/hashfile/transfer.py b/src/dvc_data/hashfile/transfer.py index 4d678ddd..42702ec2 100644 --- a/src/dvc_data/hashfile/transfer.py +++ b/src/dvc_data/hashfile/transfer.py @@ -1,17 +1,13 @@ import errno import logging from collections import defaultdict +from collections.abc import Iterable from typing import ( TYPE_CHECKING, Any, Callable, - Dict, - Iterable, - List, NamedTuple, Optional, - Set, - Tuple, ) from fsspec.callbacks import DEFAULT_CALLBACK @@ -32,8 +28,8 @@ class TransferResult(NamedTuple): - transferred: Set["HashInfo"] - failed: Set["HashInfo"] + transferred: set["HashInfo"] + failed: set["HashInfo"] def _log_exception(oid: str, exc: BaseException): @@ -69,14 +65,14 @@ def _do_transfer( dest_index: Optional["ObjectDBIndexBase"] = None, cache_odb: Optional["HashFileDB"] = None, **kwargs: Any, -) -> Set["HashInfo"]: +) -> set["HashInfo"]: """Do object transfer. Returns: Set containing any hash_infos which failed to transfer. """ dir_ids, file_ids = split(lambda hash_info: hash_info.isdir, obj_ids) - failed_ids: Set["HashInfo"] = set() + failed_ids: set["HashInfo"] = set() succeeded_dir_objs = [] all_file_ids = set(file_ids) @@ -145,8 +141,8 @@ def _add( dest: "HashFileDB", hash_infos: Iterable["HashInfo"], **kwargs, -) -> Set["HashInfo"]: - failed: Set["HashInfo"] = set() +) -> set["HashInfo"]: + failed: set["HashInfo"] = set() if not hash_infos: return failed @@ -154,7 +150,7 @@ def _error(oid: str, exc: BaseException): _log_exception(oid, exc) failed.add(HashInfo(src.hash_name, oid)) - fs_map: Dict["FileSystem", List[Tuple[str, str]]] = defaultdict(list) + fs_map: dict["FileSystem", list[tuple[str, str]]] = defaultdict(list) for hash_info in hash_infos: assert hash_info.value obj = src.get(hash_info.value) diff --git a/src/dvc_data/hashfile/tree.py b/src/dvc_data/hashfile/tree.py index 53ad7acc..57235979 100644 --- a/src/dvc_data/hashfile/tree.py +++ b/src/dvc_data/hashfile/tree.py @@ -1,7 +1,8 @@ import json import logging import posixpath -from typing import TYPE_CHECKING, Any, Dict, Final, Iterable, Optional, Tuple +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any, Final, Optional from dvc_objects.errors import ObjectFormatError @@ -54,8 +55,8 @@ def __init__(self): self.path = None # type: ignore[assignment] self.hash_info = None # type: ignore[assignment] self.oid = None # type: ignore[assignment] - self._dict: Dict[ - Tuple[str, ...], Tuple[Optional["Meta"], Optional["HashInfo"]] + self._dict: dict[ + tuple[str, ...], tuple[Optional["Meta"], Optional["HashInfo"]] ] = {} @cached_property @@ -66,7 +67,7 @@ def _trie(self) -> "Trie": def add( self, - key: Tuple[str, ...], + key: tuple[str, ...], meta: Optional["Meta"], oid: Optional["HashInfo"], ): @@ -74,8 +75,8 @@ def add( self._dict[key] = (meta, oid) def get( - self, key: Tuple[str, ...], default=None - ) -> Optional[Tuple[Optional["Meta"], Optional["HashInfo"]]]: + self, key: tuple[str, ...], default=None + ) -> Optional[tuple[Optional["Meta"], Optional["HashInfo"]]]: return self._dict.get(key, default) def digest(self, with_meta: bool = False, name: str = DEFAULT_ALGORITHM): @@ -132,7 +133,7 @@ def as_dict(self): def as_list(self, with_meta: bool = False): from operator import itemgetter - def _hi_to_dict(hi: Optional["HashInfo"]) -> Dict[str, Any]: + def _hi_to_dict(hi: Optional["HashInfo"]) -> dict[str, Any]: if not hi: return {} if hi.name == "md5-dos2unix": @@ -211,7 +212,7 @@ def load(cls, odb, hash_info, hash_name: Optional[str] = None) -> "Tree": return tree - def filter(self, prefix: Tuple[str]) -> Optional["Tree"]: # noqa: A003 + def filter(self, prefix: tuple[str]) -> Optional["Tree"]: # noqa: A003 """Return a filtered copy of this tree that only contains entries inside prefix. @@ -232,7 +233,7 @@ def filter(self, prefix: Tuple[str]) -> Optional["Tree"]: # noqa: A003 pass return tree - def get_obj(self, odb, prefix: Tuple[str]) -> Optional[HashFile]: + def get_obj(self, odb, prefix: tuple[str]) -> Optional[HashFile]: """Return object at the specified prefix in this tree. Returns None if no object exists at the specified prefix. diff --git a/src/dvc_data/hashfile/utils.py b/src/dvc_data/hashfile/utils.py index 670c46cf..9656fc27 100644 --- a/src/dvc_data/hashfile/utils.py +++ b/src/dvc_data/hashfile/utils.py @@ -1,7 +1,7 @@ import errno import hashlib import json -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from dvc_objects.fs.base import AnyFSPath, FileSystem @@ -15,7 +15,7 @@ def to_nanoseconds(ts: float) -> int: def get_mtime_and_size( path: "AnyFSPath", fs: "FileSystem", ignore: Optional["Ignore"] = None -) -> Tuple[str, int]: +) -> tuple[str, int]: if not fs.isdir(path): base_stat = fs.info(path) size = base_stat["size"] diff --git a/src/dvc_data/index/build.py b/src/dvc_data/index/build.py index 4b1bef63..289a7185 100644 --- a/src/dvc_data/index/build.py +++ b/src/dvc_data/index/build.py @@ -1,5 +1,6 @@ +from collections.abc import Iterable from itertools import chain -from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple +from typing import TYPE_CHECKING, Any, Optional from dvc_objects.fs.local import LocalFileSystem @@ -18,7 +19,7 @@ def build_entry( path: str, fs: "FileSystem", - info: Optional[Dict[str, Any]] = None, + info: Optional[dict[str, Any]] = None, compute_hash: Optional[bool] = False, state: Optional["StateBase"] = None, hash_name: str = DEFAULT_ALGORITHM, @@ -56,11 +57,11 @@ def build_entries( for root, dirs, files in walk_iter: if root == path: - root_key: Tuple[str, ...] = () + root_key: tuple[str, ...] = () else: root_key = fs.relparts(root, path) - entries: Iterable[Tuple[str, Optional[Dict]]] + entries: Iterable[tuple[str, Optional[dict]]] if detail: entries = chain(dirs.items(), files.items()) else: diff --git a/src/dvc_data/index/checkout.py b/src/dvc_data/index/checkout.py index 7db797a1..f8067e5f 100644 --- a/src/dvc_data/index/checkout.py +++ b/src/dvc_data/index/checkout.py @@ -2,16 +2,11 @@ import os import stat from collections import defaultdict +from collections.abc import Collection, Iterable, Iterator from typing import ( TYPE_CHECKING, Callable, - Collection, - Dict, - Iterable, - Iterator, - List, Optional, - Tuple, ) from attrs import define, field @@ -61,7 +56,7 @@ def _check_versioning(paths: Iterable["AnyFSPath"], fs: "FileSystem"): def _delete_files( - entries: List["DataIndexEntry"], + entries: list["DataIndexEntry"], path: str, fs: "FileSystem", ): @@ -82,12 +77,12 @@ def _create_files( # noqa: C901, PLR0912, PLR0913 storage: str = "cache", onerror=None, state: Optional["StateBase"] = None, - links: Optional[List[str]] = None, + links: Optional[list[str]] = None, ): if index is None: return - by_storage: Dict["Storage", List[Tuple["DataIndexEntry", str, str]]] = defaultdict( + by_storage: dict["Storage", list[tuple["DataIndexEntry", str, str]]] = defaultdict( list ) for entry in entries: @@ -199,7 +194,7 @@ def _chmod_files(entries, path, fs): class Diff: old: Optional["BaseDataIndex"] = field(default=None) new: Optional["BaseDataIndex"] = field(default=None) - changes: Dict["DataIndexKey", "Change"] = field(factory=dict) + changes: dict["DataIndexKey", "Change"] = field(factory=dict) files_delete: list = field(factory=list) dirs_delete: list = field(factory=list) files_create: list = field(factory=list) @@ -349,7 +344,7 @@ def apply( storage: str = "cache", onerror: Optional[Callable] = None, state: Optional["StateBase"] = None, - links: Optional[List[str]] = None, + links: Optional[list[str]] = None, ) -> None: if onerror is None: onerror = _onerror_noop @@ -390,7 +385,7 @@ def _prune_existing_versions( jobs: Optional[int] = None, ) -> Iterator["DataIndexEntry"]: assert fs.version_aware - query_vers: Dict[str, "DataIndexEntry"] = {} + query_vers: dict[str, "DataIndexEntry"] = {} jobs = jobs or fs.jobs for entry in entries: diff --git a/src/dvc_data/index/collect.py b/src/dvc_data/index/collect.py index c1e12340..33553f84 100644 --- a/src/dvc_data/index/collect.py +++ b/src/dvc_data/index/collect.py @@ -1,5 +1,5 @@ import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Optional from fsspec.callbacks import DEFAULT_CALLBACK @@ -78,10 +78,10 @@ def collect( # noqa: C901, PLR0912 cache_index=None, cache_key=None, push: bool = False, -) -> List["DataIndex"]: +) -> list["DataIndex"]: from fsspec.utils import tokenize - storage_by_fs: Dict[Tuple[str, str], StorageInfo] = {} + storage_by_fs: dict[tuple[str, str], StorageInfo] = {} skip = set() if cache_index is None: diff --git a/src/dvc_data/index/diff.py b/src/dvc_data/index/diff.py index 3e335a5d..d57290bb 100644 --- a/src/dvc_data/index/diff.py +++ b/src/dvc_data/index/diff.py @@ -1,5 +1,6 @@ from collections import deque -from typing import TYPE_CHECKING, Any, Callable, Deque, Iterable, Optional, Tuple +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any, Callable, Optional from attrs import define from fsspec.callbacks import DEFAULT_CALLBACK, Callback @@ -171,7 +172,7 @@ def _diff( # noqa: C901, PLR0912 roots: Optional[Iterable["DataIndexKey"]] = None, ): roots = roots or [()] - todo: Deque[Tuple[dict, dict, bool]] = deque() + todo: deque[tuple[dict, dict, bool]] = deque() for root in roots: old_root_items = {} diff --git a/src/dvc_data/index/fetch.py b/src/dvc_data/index/fetch.py index 76aab166..d4ac9ccb 100644 --- a/src/dvc_data/index/fetch.py +++ b/src/dvc_data/index/fetch.py @@ -1,6 +1,6 @@ import logging from functools import partial -from typing import TYPE_CHECKING, Optional, Set +from typing import TYPE_CHECKING, Optional from dvc_objects.fs.local import LocalFileSystem from fsspec.callbacks import DEFAULT_CALLBACK @@ -166,7 +166,7 @@ def _on_error(failed, oid, exc): diff = compare(old, filtered) cache.fs.makedirs(cache.fs.parent(cache.path), exist_ok=True) - failed_keys: Set["DataIndexKey"] = set() + failed_keys: set["DataIndexKey"] = set() apply( diff, cache.path, diff --git a/src/dvc_data/index/index.py b/src/dvc_data/index/index.py index 068e3dfc..120541c5 100644 --- a/src/dvc_data/index/index.py +++ b/src/dvc_data/index/index.py @@ -2,15 +2,12 @@ import logging import os from abc import ABC, abstractmethod +from collections.abc import Iterator, MutableMapping from typing import ( TYPE_CHECKING, Any, Callable, - Dict, - Iterator, - MutableMapping, Optional, - Tuple, cast, ) @@ -35,7 +32,7 @@ logger = logging.getLogger(__name__) -DataIndexKey = Tuple[str, ...] +DataIndexKey = tuple[str, ...] @attrs.define(hash=True) @@ -54,7 +51,7 @@ def isdir(self) -> bool: return False @classmethod - def from_dict(cls, d: Dict[str, Dict]) -> "DataIndexEntry": + def from_dict(cls, d: dict[str, dict]) -> "DataIndexEntry": ret = cls() meta = d.get("meta") @@ -69,8 +66,8 @@ def from_dict(cls, d: Dict[str, Dict]) -> "DataIndexEntry": return ret - def to_dict(self) -> Dict[str, Any]: - ret: Dict[str, Any] = {} + def to_dict(self) -> dict[str, Any]: + ret: dict[str, Any] = {} if self.meta: ret["meta"] = self.meta.to_dict() @@ -163,7 +160,7 @@ def get_key(self, entry: "DataIndexEntry") -> "DataIndexKey": pass @abstractmethod - def get(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: + def get(self, entry: "DataIndexEntry") -> tuple["FileSystem", str]: pass def exists(self, entry: "DataIndexEntry") -> bool: @@ -207,7 +204,7 @@ def get_key(self, entry: "DataIndexEntry") -> "DataIndexKey": return self.odb._oid_parts(entry.hash_info.value) - def get(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: + def get(self, entry: "DataIndexEntry") -> tuple["FileSystem", str]: if not entry.hash_info: raise ValueError @@ -279,7 +276,7 @@ def get_key(self, entry: "DataIndexEntry") -> "DataIndexKey": assert entry.key[: len(self.prefix)] == self.prefix return entry.key[len(self.prefix) :] - def get(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: + def get(self, entry: "DataIndexEntry") -> tuple["FileSystem", str]: assert entry.key is not None assert entry.key[: len(self.prefix)] == self.prefix path = self.fs.join(self.path, *entry.key[len(self.prefix) :]) @@ -425,7 +422,7 @@ def get_remote_odb(self, entry: "DataIndexEntry") -> "HashFileDB": def get_storage( self, entry: "DataIndexEntry", typ: str - ) -> Tuple["FileSystem", str]: + ) -> tuple["FileSystem", str]: info = self[entry.key] storage = getattr(info, typ) if not storage: @@ -433,13 +430,13 @@ def get_storage( return storage.get(entry) - def get_data(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: + def get_data(self, entry: "DataIndexEntry") -> tuple["FileSystem", str]: return self.get_storage(entry, "data") - def get_cache(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: + def get_cache(self, entry: "DataIndexEntry") -> tuple["FileSystem", str]: return self.get_storage(entry, "cache") - def get_remote(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: + def get_remote(self, entry: "DataIndexEntry") -> tuple["FileSystem", str]: return self.get_storage(entry, "remote") def cache_exists(self, entry: "DataIndexEntry", **kwargs) -> bool: @@ -465,7 +462,7 @@ def iteritems( self, prefix: Optional[DataIndexKey] = None, shallow: bool = False, - ) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]: + ) -> Iterator[tuple[DataIndexKey, DataIndexEntry]]: pass @abstractmethod @@ -483,7 +480,7 @@ def delete_node(self, key: DataIndexKey) -> None: @abstractmethod def longest_prefix( self, key: DataIndexKey - ) -> Tuple[Optional[DataIndexKey], Optional[DataIndexEntry]]: + ) -> tuple[Optional[DataIndexKey], Optional[DataIndexEntry]]: pass def _get_meta(self, key, entry): @@ -745,7 +742,7 @@ def shortest_prefix(self, *args, **kwargs): def longest_prefix( self, key: DataIndexKey - ) -> Tuple[Optional[DataIndexKey], Optional[DataIndexEntry]]: + ) -> tuple[Optional[DataIndexKey], Optional[DataIndexEntry]]: return self._trie.longest_prefix(key) def traverse(self, *args, **kwargs) -> Any: @@ -755,7 +752,7 @@ def iteritems( self, prefix: Optional[DataIndexKey] = None, shallow: bool = False, - ) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]: + ) -> Iterator[tuple[DataIndexKey, DataIndexEntry]]: if prefix: item = self._trie.longest_prefix(prefix) if item: diff --git a/src/dvc_data/index/push.py b/src/dvc_data/index/push.py index ab102eb0..50e91751 100644 --- a/src/dvc_data/index/push.py +++ b/src/dvc_data/index/push.py @@ -1,6 +1,6 @@ import logging from functools import partial -from typing import TYPE_CHECKING, Any, Optional, Set +from typing import TYPE_CHECKING, Any, Optional from fsspec.callbacks import DEFAULT_CALLBACK @@ -101,7 +101,7 @@ def push( ) data.fs.makedirs(data.fs.parent(data.path), exist_ok=True) - failed_keys: Set["DataIndexKey"] = set() + failed_keys: set["DataIndexKey"] = set() if data.fs.version_aware: desc = f"Checking status of existing versions in {data.path!r}" diff --git a/src/dvc_data/index/save.py b/src/dvc_data/index/save.py index 84df9148..ecf0e7e7 100644 --- a/src/dvc_data/index/save.py +++ b/src/dvc_data/index/save.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Optional from fsspec.callbacks import DEFAULT_CALLBACK @@ -89,7 +89,7 @@ def build_tree( index: "BaseDataIndex", prefix: "DataIndexKey", name: str = DEFAULT_ALGORITHM, -) -> Tuple["Meta", Tree]: +) -> tuple["Meta", Tree]: tree_meta = Meta(size=0, nfiles=0, isdir=True) assert tree_meta.size is not None assert tree_meta.nfiles is not None @@ -132,8 +132,8 @@ def _save_dir_entry( if TYPE_CHECKING: - _ODBMap = Dict["HashFileDB", "_FSMap"] - _FSMap = Dict["FileSystem", List[Tuple[str, str]]] + _ODBMap = dict["HashFileDB", "_FSMap"] + _FSMap = dict["FileSystem", list[tuple[str, str]]] def save( @@ -144,7 +144,7 @@ def save( storage: str = "data", **kwargs, ) -> int: - dir_entries: List["DataIndexKey"] = [] + dir_entries: list["DataIndexKey"] = [] transferred = 0 odb_map: "_ODBMap" = {} diff --git a/src/dvc_data/index/view.py b/src/dvc_data/index/view.py index cc33e390..88847e63 100644 --- a/src/dvc_data/index/view.py +++ b/src/dvc_data/index/view.py @@ -1,5 +1,6 @@ from collections import deque -from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, Tuple +from collections.abc import Iterator +from typing import TYPE_CHECKING, Any, Callable, Optional from .index import BaseDataIndex, DataIndex, DataIndexEntry, DataIndexKey @@ -56,7 +57,7 @@ def _iteritems( prefix: Optional[DataIndexKey] = None, shallow: bool = False, ensure_loaded: bool = False, - ) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]: + ) -> Iterator[tuple[DataIndexKey, DataIndexEntry]]: # NOTE: iteration is implemented using traverse and not iter/iteritems # since it supports skipping subtrie traversal for prefixes that are # not in the view. @@ -92,7 +93,7 @@ def _load_dir_keys( prefix: DataIndexKey, entry: Optional[DataIndexEntry], shallow: Optional[bool] = False, - ) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]: + ) -> Iterator[tuple[DataIndexKey, DataIndexEntry]]: # NOTE: traverse() will not enter subtries that have been added # in-place during traversal. So for dirs which we load in-place, we # need to iterate over the new keys ourselves. @@ -112,7 +113,7 @@ def iteritems( self, prefix: Optional[DataIndexKey] = None, shallow: bool = False, - ) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]: + ) -> Iterator[tuple[DataIndexKey, DataIndexEntry]]: return self._iteritems(prefix=prefix, shallow=shallow, ensure_loaded=True) def traverse(self, node_factory: Callable, **kwargs) -> Any: @@ -142,7 +143,7 @@ def delete_node(self, key: DataIndexKey) -> None: def longest_prefix( self, key: DataIndexKey - ) -> Tuple[Optional[DataIndexKey], Optional[DataIndexEntry]]: + ) -> tuple[Optional[DataIndexKey], Optional[DataIndexEntry]]: if self.filter_fn(key): return self._index.longest_prefix(key) return (None, None)