From 3cd0561b9d5e60b5bd855fdc78164b4c6fb132df Mon Sep 17 00:00:00 2001
From: Alay Shah <alay11shah@gmail.com>
Date: Wed, 20 Mar 2024 17:29:47 -0700
Subject: [PATCH] Scheduler Logging Nits
---
python/fedml/computing/scheduler/comm_utils/job_utils.py | 4 ++--
python/fedml/computing/scheduler/master/server_login.py | 2 +-
python/fedml/computing/scheduler/slave/client_runner.py | 3 +--
python/fedml/core/mlops/mlops_runtime_log.py | 2 +-
4 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/python/fedml/computing/scheduler/comm_utils/job_utils.py b/python/fedml/computing/scheduler/comm_utils/job_utils.py
index a87a467d5c..d12151450e 100644
--- a/python/fedml/computing/scheduler/comm_utils/job_utils.py
+++ b/python/fedml/computing/scheduler/comm_utils/job_utils.py
@@ -18,7 +18,7 @@
from fedml.computing.scheduler.slave.client_data_interface import FedMLClientDataInterface
from fedml.core.common.singleton import Singleton
from fedml.computing.scheduler.comm_utils.container_utils import ContainerUtils
-from typing import List
+from typing import List, Optional
import threading
import json
@@ -150,7 +150,7 @@ def balance_available_gpu_ids(available_gpu_ids):
return available_gpu_ids.copy()
@staticmethod
- def request_gpu_ids(request_gpu_num, available_gpu_ids):
+ def request_gpu_ids(request_gpu_num, available_gpu_ids) -> (Optional[str], Optional[int]):
available_gpu_count = len(available_gpu_ids)
request_gpu_num = 0 if request_gpu_num is None else request_gpu_num
matched_gpu_num = min(available_gpu_count, request_gpu_num)
diff --git a/python/fedml/computing/scheduler/master/server_login.py b/python/fedml/computing/scheduler/master/server_login.py
index dee2c83236..e53d5d6e13 100755
--- a/python/fedml/computing/scheduler/master/server_login.py
+++ b/python/fedml/computing/scheduler/master/server_login.py
@@ -384,7 +384,7 @@ def init_logs(args, edge_id):
args.edge_id = edge_id
setattr(args, "using_mlops", True)
setattr(args, "server_agent_id", edge_id)
- MLOpsRuntimeLog.get_instance(args).init_logs()
+ MLOpsRuntimeLog.get_instance(args).init_logs(log_level=logging.INFO)
def login(args):
diff --git a/python/fedml/computing/scheduler/slave/client_runner.py b/python/fedml/computing/scheduler/slave/client_runner.py
index 52228e84ff..339c408c80 100755
--- a/python/fedml/computing/scheduler/slave/client_runner.py
+++ b/python/fedml/computing/scheduler/slave/client_runner.py
@@ -907,7 +907,7 @@ def ota_upgrade(self, payload, request_json):
def callback_start_train(self, topic, payload):
# Get training params
-
+ MLOpsRuntimeLog.get_instance(self.args).init_logs(log_level=logging.INFO)
request_json = json.loads(payload)
is_retain = request_json.get("is_retain", False)
if is_retain:
@@ -918,7 +918,6 @@ def callback_start_train(self, topic, payload):
train_edge_id = str(topic).split("/")[-2]
self.args.run_id = run_id
self.args.edge_id = train_edge_id
- MLOpsRuntimeLog.get_instance(self.args).init_logs(log_level=logging.INFO)
MLOpsRuntimeLogDaemon.get_instance(self.args).start_log_processor(
run_id, train_edge_id, log_source=SchedulerConstants.get_log_source(request_json))
logging.info("start the log processor")
diff --git a/python/fedml/core/mlops/mlops_runtime_log.py b/python/fedml/core/mlops/mlops_runtime_log.py
index 409feb2bb9..5884fb7d0c 100644
--- a/python/fedml/core/mlops/mlops_runtime_log.py
+++ b/python/fedml/core/mlops/mlops_runtime_log.py
@@ -241,7 +241,7 @@ def build_log_file_path_with_run_params(
args = parser.parse_args()
setattr(args, "using_mlops", True)
setattr(args, "config_version", "local")
- MLOpsRuntimeLog.get_instance(args).init_logs()
+ MLOpsRuntimeLog.get_instance(args).init_logs(log_level=logging.INFO)
count = 0
while True: