Skip to content

Commit

Permalink
Merge branch 'develop' into CTX-5485-v1
Browse files Browse the repository at this point in the history
# Conflicts:
#	coretex/cli/commands/node.py
#	coretex/cli/modules/update.py
#	main.py
  • Loading branch information
Bogdan Tintor committed Aug 29, 2024
2 parents da82d1c + ab1fadc commit 9013f1f
Show file tree
Hide file tree
Showing 33 changed files with 1,005 additions and 624 deletions.
39 changes: 19 additions & 20 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,26 @@ verify_ssl = true
name = "pypi"

[packages]
requests = "*"
inflection = "*"
pillow = "10.2.0"
requests = ">=2.28.0"
inflection = ">=0.5.1"
pillow = "*"
numpy = "<2"
scikit-image = "*"
shapely = "*"
typed-argument-parser = "*"
typing-extensions = "*"
psutil = "*"
py3nvml = "*"
watchdog = "*"
gitpython = "*"
pyyaml = "*"
click = "*"
tabulate = "*"
inquirer = "*"
docker = "*"
cryptography = "*"
pycryptodome = "*"
onnxruntime = "*"
ezkl = "*"
scikit-image = ">=0.21.0"
shapely = ">=2.0.5"
typed-argument-parser = ">=1.10.1"
typing-extensions = ">=4.12.2"
psutil = ">=6.0.0"
py3nvml = ">=0.2.7"
watchdog = ">=4.0.1"
gitpython = ">=3.1.43"
pyyaml = ">=6.0.1"
click = ">=8.1.7"
tabulate = ">=0.9.0"
inquirer = ">=3.3.0"
cryptography = ">=43.0.0"
pycryptodome = ">=3.20.0"
onnxruntime = ">=1.16.3"
ezkl = ">=12.0.1"

[dev-packages]
mypy = "*"
Expand Down
793 changes: 406 additions & 387 deletions Pipfile.lock

Large diffs are not rendered by default.

7 changes: 1 addition & 6 deletions coretex/_task/run_logger/run_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,7 @@ def uploadTaskRunLogs(taskRunId: int, logs: List[Log]) -> bool:
"logs": [log.encode() for log in logs]
}

response = networkManager.post(
"model-queue/add-console-log",
params,
timeout = (5, 600) # connection timeout 5 seconds, log upload timeout 600 seconds
)

response = networkManager.post("model-queue/add-console-log", params)
return not response.hasFailed()
except RequestFailedError as ex:
logging.getLogger("coretexpylib").error(f">> Failed to upload console logs to Coretex. Reason: {ex}")
Expand Down
6 changes: 5 additions & 1 deletion coretex/cli/commands/login.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import click

from ..modules import user, ui
from ...configuration import UserConfiguration, InvalidConfiguration, ConfigurationNotFound
from ...configuration import UserConfiguration, InvalidConfiguration, ConfigurationNotFound, utils


@click.command()
Expand All @@ -38,5 +38,9 @@ def login() -> None:

ui.stdEcho("Please enter your credentials:")
userConfig = user.configUser()

initialData = utils.fetchInitialData()
userConfig.frontendUrl = initialData.get("frontend_url", "app.coretex.ai/")

userConfig.save()
ui.successEcho(f"User {userConfig.username} successfully logged in.")
2 changes: 1 addition & 1 deletion coretex/cli/commands/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def create(name: str, path: str, project: Optional[str], accuracy: float) -> Non
model.upload(path)

ui.successEcho(f"Model \"{model.name}\" created successfully")
ui.stdEcho(f"A new model has been created. You can open it by clicking on this URL {ui.outputUrl(model.entityUrl())}.")
ui.stdEcho(f"A new model has been created. You can open it by clicking on this URL {ui.outputUrl(userConfig.frontendUrl, model.entityUrl())}.")


@click.group()
Expand Down
39 changes: 36 additions & 3 deletions coretex/cli/commands/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from typing import Optional
from pathlib import Path

import click

from ..modules import ui
from ..modules import node as node_module
from ..modules.node import NodeStatus
from ..modules.node import NodeStatus, getNodeStatus
from ..modules.user import initializeUserSession
from ..modules.utils import onBeforeCommandExecute
from ..modules.update import activateAutoUpdate, getNodeStatus
from ..modules.utils import onBeforeCommandExecute
from ...utils import docker
from ...configuration import NodeConfiguration, InvalidConfiguration, ConfigurationNotFound

Expand Down Expand Up @@ -179,10 +180,40 @@ def config(advanced: bool) -> None:
activateAutoUpdate()


@click.command()
def status() -> None:
nodeStatus = getNodeStatus()
statusColors = {
"inactive": "red",
"active": 'green',
"busy": "cyan",
"reconnecting": "yellow"
}
statusEcho = click.style(nodeStatus.name, fg = statusColors[nodeStatus.name])
click.echo(f"Node is {statusEcho}.")


@click.command()
@click.option("--tail", "-n", type = int, help = "Shows N last logs.")
@click.option("--follow", "-f", is_flag = True, help = "Displays logs realtime.")
@click.option("--timestamps", "-t", is_flag = True, help = "Displays timestamps for logs.")
def logs(tail: Optional[int], follow: bool, timestamps: bool) -> None:
if not node_module.isRunning():
ui.errorEcho("There is no currently running Node on the machine.")
return

node_module.showLogs(tail, follow, timestamps)


@click.group()
@onBeforeCommandExecute(docker.isDockerAvailable)
@onBeforeCommandExecute(docker.isDockerAvailable, excludeSubcommands = ["status"])
@onBeforeCommandExecute(initializeUserSession)
<<<<<<< HEAD
@onBeforeCommandExecute(node_module.checkResourceLimitations)
=======
@onBeforeCommandExecute(node_module.checkResourceLimitations, excludeSubcommands = ["status"])
@onBeforeCommandExecute(checkEnvironment)
>>>>>>> develop
def node() -> None:
pass

Expand All @@ -191,3 +222,5 @@ def node() -> None:
node.add_command(stop, "stop")
node.add_command(update, "update")
node.add_command(config, "config")
node.add_command(status, "status")
node.add_command(logs, "logs")
8 changes: 6 additions & 2 deletions coretex/cli/commands/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
@click.option("--type", "-t", "projectType", type = int, help = "Project type")
@click.option("--description", "-d", type = str, help = "Project description")
def create(name: Optional[str], projectType: Optional[int], description: Optional[str]) -> None:
project = project_utils.createProject(name, projectType, description)
userConfig = UserConfiguration.load()
project = project_utils.createProject(userConfig.frontendUrl, name, projectType, description)

selectNewProject = ui.clickPrompt("Do you want to select the new project as default? (Y/n)", type = bool, default = True)
if selectNewProject:
Expand Down Expand Up @@ -86,7 +86,11 @@ def select(name: str) -> None:
userConfig.selectProject(project.id)
except ValueError:
ui.errorEcho(f"Project \"{name}\" not found.")
project = project_utils.promptProjectCreate("Do you want to create a project with that name?", name)
project = project_utils.promptProjectCreate(
"Do you want to create a project with that name?",
name,
userConfig.frontendUrl
)
if project is None:
return

Expand Down
26 changes: 25 additions & 1 deletion coretex/cli/commands/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from typing import Optional

import click
import webbrowser

from ..modules import ui
from ..modules.project_utils import getProject
from ..modules.user import initializeUserSession
from ..modules.utils import onBeforeCommandExecute
Expand All @@ -36,7 +38,6 @@ class RunException(Exception):


@click.command()
@onBeforeCommandExecute(initializeUserSession)
@click.argument("path", type = click.Path(exists = True, dir_okay = False))
@click.option("--name", type = str, default = None)
@click.option("--description", type = str, default = None)
Expand All @@ -58,6 +59,14 @@ def run(path: str, name: Optional[str], description: Optional[str], snapshot: bo
if selectedProject is None:
return

ui.stdEcho(
"Project info: "
f"\n\tName: {selectedProject.name}"
f"\n\tProject type: {selectedProject.projectType.name}"
f"\n\tDescription: {selectedProject.description}"
f"\n\tCreated on: {selectedProject.createdOn}"
)

taskRun: TaskRun = TaskRun.runLocal(
selectedProject.id,
snapshot,
Expand All @@ -67,6 +76,12 @@ def run(path: str, name: Optional[str], description: Optional[str], snapshot: bo
entryPoint = path
)

ui.stdEcho(
"Task Run successfully started. "
f"You can open it by clicking on this URL {ui.outputUrl(userConfig.frontendUrl, taskRun.entityUrl())}."
)
webbrowser.open(f"{userConfig.frontendUrl}/{taskRun.entityUrl()}")

taskRun.updateStatus(TaskRunStatus.preparingToStart)

with TaskRunWorker(userConfig.refreshToken, taskRun.id):
Expand Down Expand Up @@ -95,3 +110,12 @@ def run(path: str, name: Optional[str], description: Optional[str], snapshot: bo
taskRun.updateStatus(TaskRunStatus.completedWithSuccess)

folder_manager.clearTempFiles()


@click.group()
@onBeforeCommandExecute(initializeUserSession)
def task() -> None:
pass


task.add_command(run, "run")
52 changes: 48 additions & 4 deletions coretex/cli/modules/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import os
import logging
import requests
import platform

import click

Expand Down Expand Up @@ -72,7 +73,8 @@ def start(dockerImage: str, nodeConfig: NodeConfiguration) -> None:
"CTX_API_URL": os.environ["CTX_API_URL"],
"CTX_STORAGE_PATH": "/root/.coretex",
"CTX_NODE_ACCESS_TOKEN": nodeConfig.accessToken,
"CTX_NODE_MODE": str(nodeConfig.mode)
"CTX_NODE_MODE": str(nodeConfig.mode),
"CTX_HEARTBEAT_INTERVAL": str(nodeConfig.heartbeatInterval)
}

if nodeConfig.modelId is not None:
Expand Down Expand Up @@ -198,6 +200,10 @@ def shouldUpdate(image: str) -> bool:
return True


def showLogs(tail: Optional[int], follow: bool, timestamps: bool) -> None:
docker.getLogs(config_defaults.DOCKER_CONTAINER_NAME, tail, follow, timestamps)


def registerNode(
name: str,
nodeMode: NodeMode,
Expand Down Expand Up @@ -371,6 +377,7 @@ def checkResourceLimitations() -> None:
def configureNode(advanced: bool) -> NodeConfiguration:
ui.highlightEcho("[Node Configuration]")
nodeConfig = NodeConfiguration({}) # create new empty node config
currentOS = platform.system().lower()

cpuLimit, ramLimit = docker.getResourceLimits()
swapLimit = docker.getDockerSwapLimit()
Expand All @@ -383,11 +390,42 @@ def configureNode(advanced: bool) -> NodeConfiguration:
else:
nodeConfig.image = "coretexai/coretex-node"

if isGPUAvailable():
# GPU Access is supported for:
# - Linux (Docker Engine)
# - Windows (Docker Desktop)

if isGPUAvailable() and not (docker.isDockerDesktop() and currentOS != "windows"):
nodeConfig.allowGpu = ui.clickPrompt("Do you want to allow the Node to access your GPU? (Y/n)", type = bool, default = True)
else:
nodeConfig.allowGpu = False

if nodeConfig.allowGpu and platform.system().lower() == "linux" and not docker.isDaemonFileUpdated():
shouldUpdateDockerConfig = ui.clickPrompt(
"NVIDIA has a bug where a docker container running Coretex Node can lose access to GPU "
"(https://github.com/NVIDIA/nvidia-container-toolkit/issues/48). "
"\nDo you want Coretex CLI to apply a workaround for this bug "
"(NOTE: This requires docker daemon restart)? (Y/n)",
type = bool,
default = True
)

if shouldUpdateDockerConfig:
docker.updateDaemonFile()
shouldRestartDocker = ui.clickPrompt("Do you want to restart Docker to apply the changes? (Y/n)", type = bool, default = True)

if shouldRestartDocker:
docker.restartDocker()
else:
ui.warningEcho(
"Warning: The changes will not take effect until Docker is restarted. "
"(https://github.com/NVIDIA/nvidia-container-toolkit/issues/48)"
)
else:
ui.warningEcho(
"Warning: Not updating the daemon.json file may lead to GPU access issues in Docker "
"containers. (https://github.com/NVIDIA/nvidia-container-toolkit/issues/48)"
)

if imageType == ImageType.official:
tag = "gpu" if nodeConfig.allowGpu else "cpu"
nodeConfig.image += f":latest-{tag}"
Expand Down Expand Up @@ -439,11 +477,17 @@ def configureNode(advanced: bool) -> NodeConfiguration:
)

nodeConfig.endpointInvocationPrice = promptInvocationPrice()

nodeConfig.heartbeatInterval = ui.clickPrompt(
"Enter interval (seconds) at which the Node will send heartbeat to Coretex Server",
config_defaults.HEARTBEAT_INTERVAL // 1000,
type = int
) * 1000 # Node expects the value in ms
else:
ui.stdEcho("To configure node manually run coretex node config with --verbose flag.")
ui.stdEcho("To configure node manually run coretex node config with --advanced flag.")

publicKey: Optional[bytes] = None
if isinstance(nodeConfig.secret, str) and nodeConfig.secret != config_defaults.DEFAULT_NODE_SECRET:
if nodeConfig.secret is not None and nodeConfig.secret != config_defaults.DEFAULT_NODE_SECRET:
ui.progressEcho("Generating RSA key-pair (2048 bits long) using provided node secret...")
rsaKey = rsa.generateKey(2048, nodeConfig.secret.encode("utf-8"))
publicKey = rsa.getPublicKeyBytes(rsaKey.public_key())
Expand Down
Loading

0 comments on commit 9013f1f

Please sign in to comment.