diff --git a/coretex/__init__.py b/coretex/__init__.py index 079d56dd..12a51abb 100644 --- a/coretex/__init__.py +++ b/coretex/__init__.py @@ -34,3 +34,4 @@ # Use this only from .entities import * from ._task import currentTaskRun, initializeRTask, TaskRunWorker +from ._folder_manager import folder_manager diff --git a/coretex/_folder_manager.py b/coretex/_folder_manager.py new file mode 100644 index 00000000..a23ac1f9 --- /dev/null +++ b/coretex/_folder_manager.py @@ -0,0 +1,188 @@ +# Copyright (C) 2023 Coretex LLC + +# This file is part of Coretex.ai + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from typing import Iterator, Optional, Union +from pathlib import Path +from contextlib import contextmanager + +import os +import shutil +import uuid + + +class FolderManager: + + """ + Used for handling everything related to local storage + when working with Coretex + + Contains + -------- + samplesFolder : Path + folder where samples are stored + modelsFolder : Path + folder where models are stored + temp : Path + folder where temp files and folders are stored, + this is deleted when the run has finished executing + datasetsFolder : Path + folder where datasets are stored (samples are symlinked for datasets) + cache : Path + folder where cache module stores items + logs : Path + folder where node and run logs are stored + environments : Path + folder where node stores python environments + """ + + def __init__(self, storagePath: Union[Path, str]): + if isinstance(storagePath, str): + storagePath = Path(storagePath) + + self._root = storagePath.expanduser() + + self.samplesFolder = self._createFolder("samples") + self.modelsFolder = self._createFolder("models") + self.datasetsFolder = self._createFolder("datasets") + self.cache = self._createFolder("cache") + self.logs = self._createFolder("logs") + self.environments = self._createFolder("environments") + self.temp = self._createFolder("temp") + self._artifactsFolder = self._createFolder("artifacts") + + self.runsLogDirectory = self.logs / "runs" + self.runsLogDirectory.mkdir(exist_ok = True) + + self.coretexpylibLogs = self.logs / "coretexpylib" + self.coretexpylibLogs.mkdir(exist_ok = True) + + def _createFolder(self, name: str) -> Path: + path = self._root / name + + if not path.exists(): + path.mkdir(parents = True, exist_ok = True) + + return path + + def createTempFolder(self, name: str) -> Path: + """ + Creates temp folder which is deleted once + the run has finished executing + + Parameters + ---------- + name : str + name of the folder + + Returns + ------- + Path -> path to the created folder + + Raises + ------ + FileExistsError -> if the temp folder already exists + + Example + ------- + >>> from coretex import folder_manager + \b + >>> dummyFolderPath = folder_manager.createTempFolder("dummyTempFolder") + >>> print(dummyFolderPath) + "/Users/X/.coretex/temp/dummyTempFolder" + """ + + tempFolderPath = self.temp / name + + if tempFolderPath.exists(): + raise FileExistsError + + tempFolderPath.mkdir() + return tempFolderPath + + def getArtifactsFolder(self, taskRunId: int) -> Path: + """ + Retrieves the path to where the artifacts are stored + for the specified TaskRuns + + Parameters + ---------- + taskRunId : int + id of the TaskRun + + Returns + ------- + Path -> path to the TaskRun artifacts local storage + + Example + ------- + >>> from coretex.folder_management import FolderManager + \b + >>> artifactsFolderPath = FolderManager.instance().getArtifactsFolder(1023) + >>> print(artifactsFolderPath) + Path("/Users/bogdanbm/.coretex/artifacts/1023") + """ + + return self._artifactsFolder / str(taskRunId) + + def clearDirectory(self, path: Path) -> None: + shutil.rmtree(path) + path.mkdir() + + def clearTempFiles(self) -> None: + """ + Deletes all temp files and folders (including artifacts) + """ + + self.clearDirectory(self.temp) + self.clearDirectory(self._artifactsFolder) + + def getRunLogsDir(self, taskRunId: int) -> Path: + taskRunLogsDir = self.runsLogDirectory / str(taskRunId) + taskRunLogsDir.mkdir(parents = True, exist_ok = True) + + return taskRunLogsDir + + @contextmanager + def tempFile(self, name: Optional[str] = None) -> Iterator[Path]: + """ + Returns a path to temporary file and deletes + it if it exists once the context is exited. + + Parameters + ---------- + name : Optional[str] + Name of the file. If not specified a random uuid4 + will be generated and used as the name + + Returns + ------- + Iterator[Path] -> path to the file + """ + + if name is None: + name = str(uuid.uuid4()) + + path = self.temp / name + if path.exists(): + raise FileExistsError(path) + try: + yield path + finally: + path.unlink(missing_ok = True) + + +folder_manager = FolderManager(os.environ["CTX_STORAGE_PATH"]) diff --git a/coretex/_logger.py b/coretex/_logger.py index 13521e92..d5a1b3cb 100644 --- a/coretex/_logger.py +++ b/coretex/_logger.py @@ -17,7 +17,7 @@ from datetime import datetime -from . import folder_manager +from ._folder_manager import folder_manager from .logging import initializeLogger, LogSeverity from .configuration import CONFIG_DIR diff --git a/coretex/_task/base_callback.py b/coretex/_task/base_callback.py index 73dcef2c..4bb61308 100644 --- a/coretex/_task/base_callback.py +++ b/coretex/_task/base_callback.py @@ -23,7 +23,7 @@ import signal from .current_task_run import setCurrentTaskRun -from .. import folder_manager +from .._folder_manager import folder_manager from ..entities import TaskRun diff --git a/coretex/_task/initialization.py b/coretex/_task/initialization.py index 2d1e2152..9bce3d4a 100644 --- a/coretex/_task/initialization.py +++ b/coretex/_task/initialization.py @@ -22,7 +22,7 @@ from .remote import processRemote from .current_task_run import setCurrentTaskRun -from .. import folder_manager +from .._folder_manager import folder_manager from ..entities import TaskRun, TaskRunStatus from ..logging import createFormatter, initializeLogger from ..logging.severity import LogSeverity diff --git a/coretex/_task/worker/utils.py b/coretex/_task/worker/utils.py index bcc287db..e05c376d 100644 --- a/coretex/_task/worker/utils.py +++ b/coretex/_task/worker/utils.py @@ -19,7 +19,7 @@ import logging -from ... import folder_manager +from ..._folder_manager import folder_manager from ...utils import createFileHandler diff --git a/coretex/bioinformatics/ctx_qiime2/utils.py b/coretex/bioinformatics/ctx_qiime2/utils.py index 191c3d2e..4e0a04ca 100644 --- a/coretex/bioinformatics/ctx_qiime2/utils.py +++ b/coretex/bioinformatics/ctx_qiime2/utils.py @@ -23,7 +23,7 @@ import shutil import gzip -from ... import folder_manager +from ..._folder_manager import folder_manager from ...entities import TaskRun, CustomSample, CustomDataset from ...networking import NetworkRequestError diff --git a/coretex/cache.py b/coretex/cache.py index c41282bd..d3845b51 100644 --- a/coretex/cache.py +++ b/coretex/cache.py @@ -25,7 +25,7 @@ import requests -from . import folder_manager +from ._folder_manager import folder_manager class CacheException(Exception): diff --git a/coretex/cli/commands/task.py b/coretex/cli/commands/task.py index 35e6d278..64c9161c 100644 --- a/coretex/cli/commands/task.py +++ b/coretex/cli/commands/task.py @@ -22,7 +22,7 @@ from ..modules.user import initializeUserSession from ..modules.utils import onBeforeCommandExecute from ..modules.project_utils import getProject -from ... import folder_manager +from ..._folder_manager import FolderManager from ..._task import TaskRunWorker, executeRunLocally, readTaskConfig, runLogger from ...configuration import loadConfig from ...entities import TaskRun, TaskRunStatus @@ -45,7 +45,8 @@ def run(path: str, name: Optional[str], description: Optional[str], snapshot: bo parameters = readTaskConfig() # clearing temporary files in case that node was manually killed before - folder_manager.clearTempFiles() + folderManager = FolderManager(config["storagePath"]) + folderManager.clearTempFiles() selectedProject = getProject(project, config) if selectedProject is None: @@ -87,4 +88,4 @@ def run(path: str, name: Optional[str], description: Optional[str], snapshot: bo else: taskRun.updateStatus(TaskRunStatus.completedWithSuccess) - folder_manager.clearTempFiles() + folderManager.clearTempFiles() diff --git a/coretex/entities/dataset/image_dataset/synthetic_image_generator.py b/coretex/entities/dataset/image_dataset/synthetic_image_generator.py index 4f8766b2..09c367f0 100644 --- a/coretex/entities/dataset/image_dataset/synthetic_image_generator.py +++ b/coretex/entities/dataset/image_dataset/synthetic_image_generator.py @@ -32,7 +32,7 @@ from .base import BaseImageDataset from ...sample import ImageSample, AnnotatedImageSampleData from ...annotation import CoretexSegmentationInstance, CoretexImageAnnotation, BBox -from .... import folder_manager +from ...._folder_manager import folder_manager ANNOTATION_NAME = "annotations.json" diff --git a/coretex/entities/dataset/network_dataset.py b/coretex/entities/dataset/network_dataset.py index 558a21e4..31eaea9f 100644 --- a/coretex/entities/dataset/network_dataset.py +++ b/coretex/entities/dataset/network_dataset.py @@ -31,7 +31,7 @@ from ..tag import EntityTagType, Taggable from ..sample import NetworkSample from ..utils import isEntityNameValid -from ... import folder_manager +from ..._folder_manager import folder_manager from ...codable import KeyDescriptor from ...networking import NetworkObject, \ fileChunkUpload, networkManager, NetworkRequestError diff --git a/coretex/entities/dataset/sequence_dataset/sequence_dataset.py b/coretex/entities/dataset/sequence_dataset/sequence_dataset.py index b2f83137..db328e51 100644 --- a/coretex/entities/dataset/sequence_dataset/sequence_dataset.py +++ b/coretex/entities/dataset/sequence_dataset/sequence_dataset.py @@ -24,7 +24,7 @@ from .base import BaseSequenceDataset from ..network_dataset import NetworkDataset, _chunkSampleImport, _encryptedSampleImport from ...sample import SequenceSample, CustomSample -from .... import folder_manager +from ...._folder_manager import folder_manager from ....codable import KeyDescriptor from ....cryptography import getProjectKey from ....utils import file as file_utils diff --git a/coretex/entities/model/model.py b/coretex/entities/model/model.py index 85a2994d..e228aca3 100644 --- a/coretex/entities/model/model.py +++ b/coretex/entities/model/model.py @@ -26,7 +26,7 @@ from ..tag import Taggable, EntityTagType from ..utils import isEntityNameValid -from ... import folder_manager +from ..._folder_manager import folder_manager from ...networking import networkManager, NetworkObject, ChunkUploadSession, MAX_CHUNK_SIZE, NetworkRequestError from ...codable import KeyDescriptor diff --git a/coretex/entities/sample/network_sample.py b/coretex/entities/sample/network_sample.py index eda3d9c2..bae1533a 100644 --- a/coretex/entities/sample/network_sample.py +++ b/coretex/entities/sample/network_sample.py @@ -26,7 +26,7 @@ from .sample import Sample from ..project import ProjectType -from ... import folder_manager +from ..._folder_manager import folder_manager from ...codable import KeyDescriptor from ...networking import NetworkObject, networkManager, NetworkRequestError, \ fileChunkUpload, MAX_CHUNK_SIZE, FileData diff --git a/coretex/entities/task_run/artifact.py b/coretex/entities/task_run/artifact.py index 4b3db090..3eeacce3 100644 --- a/coretex/entities/task_run/artifact.py +++ b/coretex/entities/task_run/artifact.py @@ -20,7 +20,7 @@ from enum import IntEnum from pathlib import Path -from ... import folder_manager +from ..._folder_manager import folder_manager from ...codable import Codable, KeyDescriptor from ...networking import networkManager, FileData from ...utils import guessMimeType diff --git a/coretex/entities/task_run/task_run.py b/coretex/entities/task_run/task_run.py index 52bb366e..314bd6f1 100644 --- a/coretex/entities/task_run/task_run.py +++ b/coretex/entities/task_run/task_run.py @@ -35,7 +35,7 @@ from ..dataset import Dataset, LocalDataset, NetworkDataset from ..project import ProjectType from ..model import Model -from ... import folder_manager +from ..._folder_manager import folder_manager from ...codable import KeyDescriptor from ...networking import networkManager, NetworkObject, NetworkRequestError, FileData diff --git a/coretex/entities/task_run/utils.py b/coretex/entities/task_run/utils.py index c1a26da9..ae549669 100644 --- a/coretex/entities/task_run/utils.py +++ b/coretex/entities/task_run/utils.py @@ -26,7 +26,7 @@ from ..dataset import Dataset, LocalDataset, NetworkDataset, LocalCustomDataset, \ CustomDataset, LocalImageDataset, ImageDataset from ..project import ProjectType -from ... import folder_manager +from ..._folder_manager import folder_manager def getDatasetType(type_: ProjectType, isLocal: bool) -> Type[Dataset]: diff --git a/coretex/folder_manager.py b/coretex/folder_manager.py deleted file mode 100644 index 002a4c9a..00000000 --- a/coretex/folder_manager.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (C) 2023 Coretex LLC - -# This file is part of Coretex.ai - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -from typing import Iterator, Optional -from pathlib import Path -from contextlib import contextmanager - -import os -import shutil -import uuid - -from .utils import file as file_utils - - -""" - Used for handling everything related to local storage - when working with Coretex - - Contains - -------- - samplesFolder : str - folder where samples are stored - modelsFolder : str - folder where models are stored - temp : str - folder where temp files and folders are stored, - this is deleted when the run has finished executing - - datasetsFolder : Path - folder where datasets are stored (samples are symlinked for datasets) - cache : Path - folder where cache module stores items - logs : Path - folder where node and run logs are stored - environments : Path - folder where node stores python environments -""" - - -def _createFolder(name: str) -> Path: - path = _root / name - - if not path.exists(): - path.mkdir(parents = True, exist_ok = True) - - return path - - -_root = Path(os.environ["CTX_STORAGE_PATH"]).expanduser() - -samplesFolder = _createFolder("samples") -modelsFolder = _createFolder("models") -datasetsFolder = _createFolder("datasets") -cache = _createFolder("cache") -logs = _createFolder("logs") -environments = _createFolder("environments") -temp = _createFolder("temp") -_artifactsFolder = _createFolder("artifacts") - -runsLogDirectory = logs / "runs" -runsLogDirectory.mkdir(exist_ok = True) - -coretexpylibLogs = logs / "coretexpylib" -coretexpylibLogs.mkdir(exist_ok = True) - - -def createTempFolder(name: str) -> Path: - """ - Creates temp folder which is deleted once - the run has finished executing - - Parameters - ---------- - name : str - name of the folder - - Returns - ------- - Path -> path to the created folder - - Raises - ------ - FileExistsError -> if the temp folder already exists - - Example - ------- - >>> from coretex import folder_manager - \b - >>> dummyFolderPath = folder_manager.createTempFolder("dummyTempFolder") - >>> print(dummyFolderPath) - "/Users/X/.coretex/temp/dummyTempFolder" - """ - - tempFolderPath = temp / name - - if tempFolderPath.exists(): - raise FileExistsError - - tempFolderPath.mkdir() - return tempFolderPath - - -def getArtifactsFolder(taskRunId: int) -> Path: - """ - Retrieves the path to where the artifacts are stored - for the specified TaskRuns - - Parameters - ---------- - taskRunId : int - id of the TaskRun - - Returns - ------- - Path -> path to the TaskRun artifacts local storage - - Example - ------- - >>> from coretex.folder_management import FolderManager - \b - >>> artifactsFolderPath = FolderManager.instance().getArtifactsFolder(1023) - >>> print(artifactsFolderPath) - Path("/Users/bogdanbm/.coretex/artifacts/1023") - - """ - - return _artifactsFolder / str(taskRunId) - - -def clearDirectory(path: Path) -> None: - for element in file_utils.walk(path): - if element.is_file(): - element.unlink() - - if element.is_dir(): - shutil.rmtree(element) - - -def clearTempFiles() -> None: - """ - Deletes all temp files and folders (including artifacts) - """ - - clearDirectory(temp) - clearDirectory(_artifactsFolder) - - -def getRunLogsDir(taskRunId: int) -> Path: - taskRunLogsDir = runsLogDirectory / str(taskRunId) - taskRunLogsDir.mkdir(parents = True, exist_ok = True) - - return taskRunLogsDir - - -@contextmanager -def tempFile(name: Optional[str] = None) -> Iterator[Path]: - """ - Returns a path to temporary file and deletes - it if it exists once the context is exited. - - Parameters - ---------- - name : Optional[str] - Name of the file. If not specified a random uuid4 - will be generated and used as the name - - Returns - ------- - Iterator[Path] -> path to the file - """ - - if name is None: - name = str(uuid.uuid4()) - - path = temp / name - if path.exists(): - raise FileExistsError(path) - - try: - yield path - finally: - path.unlink(missing_ok = True) diff --git a/coretex/utils/inference.py b/coretex/utils/inference.py index 19b0933a..7fb34acc 100644 --- a/coretex/utils/inference.py +++ b/coretex/utils/inference.py @@ -10,7 +10,7 @@ import ezkl import numpy as np -from .. import folder_manager +from .._folder_manager import folder_manager async def genWitness(inputPath: Path, circuit: Path, witnessPath: Path) -> None: