Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CTX-6421: Fixed bug where temp folder is not cleaned after run is finished. (refactor of folder_manager) #239

Merged
merged 11 commits into from
Aug 5, 2024
1 change: 1 addition & 0 deletions coretex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@
# Use this only
from .entities import *
from ._task import currentTaskRun, initializeRTask, TaskRunWorker
from ._folder_manager import folder_manager
188 changes: 188 additions & 0 deletions coretex/_folder_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# Copyright (C) 2023 Coretex LLC

# This file is part of Coretex.ai

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from typing import Iterator, Optional, Union
from pathlib import Path
from contextlib import contextmanager

import os
import shutil
import uuid


class FolderManager:

"""
Used for handling everything related to local storage
when working with Coretex

Contains
--------
samplesFolder : Path
folder where samples are stored
modelsFolder : Path
folder where models are stored
temp : Path
folder where temp files and folders are stored,
this is deleted when the run has finished executing
datasetsFolder : Path
folder where datasets are stored (samples are symlinked for datasets)
cache : Path
folder where cache module stores items
logs : Path
folder where node and run logs are stored
environments : Path
folder where node stores python environments
"""

def __init__(self, storagePath: Union[Path, str]):
if isinstance(storagePath, str):
storagePath = Path(storagePath)

self._root = storagePath.expanduser()

self.samplesFolder = self._createFolder("samples")
self.modelsFolder = self._createFolder("models")
self.datasetsFolder = self._createFolder("datasets")
self.cache = self._createFolder("cache")
self.logs = self._createFolder("logs")
self.environments = self._createFolder("environments")
self.temp = self._createFolder("temp")
self._artifactsFolder = self._createFolder("artifacts")

self.runsLogDirectory = self.logs / "runs"
self.runsLogDirectory.mkdir(exist_ok = True)

self.coretexpylibLogs = self.logs / "coretexpylib"
self.coretexpylibLogs.mkdir(exist_ok = True)

def _createFolder(self, name: str) -> Path:
path = self._root / name

if not path.exists():
path.mkdir(parents = True, exist_ok = True)

return path

def createTempFolder(self, name: str) -> Path:
"""
Creates temp folder which is deleted once
the run has finished executing

Parameters
----------
name : str
name of the folder

Returns
-------
Path -> path to the created folder

Raises
------
FileExistsError -> if the temp folder already exists

Example
-------
>>> from coretex import folder_manager
\b
>>> dummyFolderPath = folder_manager.createTempFolder("dummyTempFolder")
>>> print(dummyFolderPath)
"/Users/X/.coretex/temp/dummyTempFolder"
"""

tempFolderPath = self.temp / name

if tempFolderPath.exists():
raise FileExistsError

tempFolderPath.mkdir()
return tempFolderPath

def getArtifactsFolder(self, taskRunId: int) -> Path:
"""
Retrieves the path to where the artifacts are stored
for the specified TaskRuns

Parameters
----------
taskRunId : int
id of the TaskRun

Returns
-------
Path -> path to the TaskRun artifacts local storage

Example
-------
>>> from coretex.folder_management import FolderManager
\b
>>> artifactsFolderPath = FolderManager.instance().getArtifactsFolder(1023)
>>> print(artifactsFolderPath)
Path("/Users/bogdanbm/.coretex/artifacts/1023")
"""

return self._artifactsFolder / str(taskRunId)

def clearDirectory(self, path: Path) -> None:
shutil.rmtree(path)
path.mkdir()

def clearTempFiles(self) -> None:
"""
Deletes all temp files and folders (including artifacts)
"""

self.clearDirectory(self.temp)
self.clearDirectory(self._artifactsFolder)

def getRunLogsDir(self, taskRunId: int) -> Path:
taskRunLogsDir = self.runsLogDirectory / str(taskRunId)
taskRunLogsDir.mkdir(parents = True, exist_ok = True)

return taskRunLogsDir

@contextmanager
def tempFile(self, name: Optional[str] = None) -> Iterator[Path]:
"""
Returns a path to temporary file and deletes
it if it exists once the context is exited.

Parameters
----------
name : Optional[str]
Name of the file. If not specified a random uuid4
will be generated and used as the name

Returns
-------
Iterator[Path] -> path to the file
"""

if name is None:
name = str(uuid.uuid4())

path = self.temp / name
if path.exists():
raise FileExistsError(path)
try:
yield path
finally:
path.unlink(missing_ok = True)


folder_manager = FolderManager(os.environ["CTX_STORAGE_PATH"])
2 changes: 1 addition & 1 deletion coretex/_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from datetime import datetime

from . import folder_manager
from ._folder_manager import folder_manager
from .logging import initializeLogger, LogSeverity
from .configuration import CONFIG_DIR

Expand Down
2 changes: 1 addition & 1 deletion coretex/_task/base_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import signal

from .current_task_run import setCurrentTaskRun
from .. import folder_manager
from .._folder_manager import folder_manager
from ..entities import TaskRun


Expand Down
2 changes: 1 addition & 1 deletion coretex/_task/initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from .remote import processRemote
from .current_task_run import setCurrentTaskRun
from .. import folder_manager
from .._folder_manager import folder_manager
from ..entities import TaskRun, TaskRunStatus
from ..logging import createFormatter, initializeLogger
from ..logging.severity import LogSeverity
Expand Down
2 changes: 1 addition & 1 deletion coretex/_task/worker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import logging

from ... import folder_manager
from ..._folder_manager import folder_manager
from ...utils import createFileHandler


Expand Down
2 changes: 1 addition & 1 deletion coretex/bioinformatics/ctx_qiime2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import shutil
import gzip

from ... import folder_manager
from ..._folder_manager import folder_manager
from ...entities import TaskRun, CustomSample, CustomDataset
from ...networking import NetworkRequestError

Expand Down
2 changes: 1 addition & 1 deletion coretex/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import requests

from . import folder_manager
from ._folder_manager import folder_manager


class CacheException(Exception):
Expand Down
7 changes: 4 additions & 3 deletions coretex/cli/commands/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ..modules.user import initializeUserSession
from ..modules.utils import onBeforeCommandExecute
from ..modules.project_utils import getProject
from ... import folder_manager
from ..._folder_manager import FolderManager
from ..._task import TaskRunWorker, executeRunLocally, readTaskConfig, runLogger
from ...configuration import loadConfig
from ...entities import TaskRun, TaskRunStatus
Expand All @@ -45,7 +45,8 @@ def run(path: str, name: Optional[str], description: Optional[str], snapshot: bo
parameters = readTaskConfig()

# clearing temporary files in case that node was manually killed before
folder_manager.clearTempFiles()
folderManager = FolderManager(config["storagePath"])
folderManager.clearTempFiles()

selectedProject = getProject(project, config)
if selectedProject is None:
Expand Down Expand Up @@ -87,4 +88,4 @@ def run(path: str, name: Optional[str], description: Optional[str], snapshot: bo
else:
taskRun.updateStatus(TaskRunStatus.completedWithSuccess)

folder_manager.clearTempFiles()
folderManager.clearTempFiles()
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from .base import BaseImageDataset
from ...sample import ImageSample, AnnotatedImageSampleData
from ...annotation import CoretexSegmentationInstance, CoretexImageAnnotation, BBox
from .... import folder_manager
from ...._folder_manager import folder_manager


ANNOTATION_NAME = "annotations.json"
Expand Down
2 changes: 1 addition & 1 deletion coretex/entities/dataset/network_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from ..tag import EntityTagType, Taggable
from ..sample import NetworkSample
from ..utils import isEntityNameValid
from ... import folder_manager
from ..._folder_manager import folder_manager
from ...codable import KeyDescriptor
from ...networking import NetworkObject, \
fileChunkUpload, networkManager, NetworkRequestError
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from .base import BaseSequenceDataset
from ..network_dataset import NetworkDataset, _chunkSampleImport, _encryptedSampleImport
from ...sample import SequenceSample, CustomSample
from .... import folder_manager
from ...._folder_manager import folder_manager
from ....codable import KeyDescriptor
from ....cryptography import getProjectKey
from ....utils import file as file_utils
Expand Down
2 changes: 1 addition & 1 deletion coretex/entities/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from ..tag import Taggable, EntityTagType
from ..utils import isEntityNameValid
from ... import folder_manager
from ..._folder_manager import folder_manager
from ...networking import networkManager, NetworkObject, ChunkUploadSession, MAX_CHUNK_SIZE, NetworkRequestError
from ...codable import KeyDescriptor

Expand Down
2 changes: 1 addition & 1 deletion coretex/entities/sample/network_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from .sample import Sample
from ..project import ProjectType
from ... import folder_manager
from ..._folder_manager import folder_manager
from ...codable import KeyDescriptor
from ...networking import NetworkObject, networkManager, NetworkRequestError, \
fileChunkUpload, MAX_CHUNK_SIZE, FileData
Expand Down
2 changes: 1 addition & 1 deletion coretex/entities/task_run/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from enum import IntEnum
from pathlib import Path

from ... import folder_manager
from ..._folder_manager import folder_manager
from ...codable import Codable, KeyDescriptor
from ...networking import networkManager, FileData
from ...utils import guessMimeType
Expand Down
2 changes: 1 addition & 1 deletion coretex/entities/task_run/task_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from ..dataset import Dataset, LocalDataset, NetworkDataset
from ..project import ProjectType
from ..model import Model
from ... import folder_manager
from ..._folder_manager import folder_manager
from ...codable import KeyDescriptor
from ...networking import networkManager, NetworkObject, NetworkRequestError, FileData

Expand Down
2 changes: 1 addition & 1 deletion coretex/entities/task_run/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from ..dataset import Dataset, LocalDataset, NetworkDataset, LocalCustomDataset, \
CustomDataset, LocalImageDataset, ImageDataset
from ..project import ProjectType
from ... import folder_manager
from ..._folder_manager import folder_manager


def getDatasetType(type_: ProjectType, isLocal: bool) -> Type[Dataset]:
Expand Down
Loading
Loading