re-activated validator recent model demand loading mechanism

mattjhawken · mattjhawken · commit 644837eae7a2 · 2025-10-21T16:46:49.000-04:00
diff --git a/tensorlink/ml/graphing.py b/tensorlink/ml/graphing.py
@@ -64,7 +64,7 @@ def __init__(self, user_memory: int = 0):
 
     def create_distributed_config(
         self,
-        model: nn.Module,
+        model: Union[nn.Module, str],
         training: bool,
         trusted: bool,
         handle_layers: bool = True,
diff --git a/tensorlink/ml/validator.py b/tensorlink/ml/validator.py
@@ -254,17 +254,15 @@ def _get_popular_models(self) -> list:
     def _manage_auto_loaded_models(self):
         """Manage auto-loaded models based on popularity from JSON cache, falling back to DEFAULT_MODELS"""
         popular_models = self._get_popular_models()
-        # if not popular_models:
-        #     models_to_load = DEFAULT_MODELS[: self.MAX_AUTO_MODELS]
-        # else:
-        #     models_to_load = popular_models[: self.MAX_AUTO_MODELS]
-        #     self.send_request(
-        #         "debug_print",
-        #         (f"Loading popular models: {models_to_load}", "blue", logging.INFO),
-        #     )
-
-        # If no popular models tracked yet, use DEFAULT_MODELS as fallback
-        models_to_load = DEFAULT_MODELS[: self.MAX_AUTO_MODELS]
+
+        if not popular_models:
+            models_to_load = DEFAULT_MODELS[: self.MAX_AUTO_MODELS]
+        else:
+            models_to_load = popular_models[: self.MAX_AUTO_MODELS]
+            self.send_request(
+                "debug_print",
+                (f"Loading popular models: {models_to_load}", "blue", logging.INFO),
+            )
 
         # Load models up to the limit
         for model_name in models_to_load:
@@ -305,10 +303,10 @@ def _manage_auto_loaded_models(self):
                         )
                         self._remove_hosted_job(model_name)
 
-    def inspect_model(self, model_name: str, job_data: dict = None):
+    def inspect_model(self, model_name: str, job_data: dict):
         """Inspect a model to determine network requirements and store distribution in JSON cache"""
         parser = ModelParser()
-        model_name = job_data.get("model_name", model_name)
+        model_name: str = job_data.get("model_name", model_name)
 
         # Load HF model, create and save distribution
         distribution = parser.create_distributed_config(
@@ -372,7 +370,7 @@ def check_node(self):
             job_data = self.send_request("get_jobs", None)
 
             if isinstance(job_data, dict):
-                model_name = job_data.get("model_name")
+                model_name: str = job_data.get("model_name", "")
 
                 if job_data.get("api"):
                     payment = job_data.get("payment", 0)
@@ -625,6 +623,16 @@ def _finalize_hosted_job(self, model_name: str):
             # Distribute the model across workers
             self.modules[module_id].distribute_model(distribution)
 
+            # Ensure workers are registered
+            for dist_module_id, dist_module_info in distribution.items():
+                if dist_module_id in self.modules and isinstance(
+                    self.modules[dist_module_id], dict
+                ):
+                    # Update workers list to ensure it's current
+                    self.modules[dist_module_id]["workers"] = dist_module_info.get(
+                        "workers", []
+                    )
+
             # Load tokenizer
             self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name)
 
diff --git a/tensorlink/p2p/smart_node.py b/tensorlink/p2p/smart_node.py
@@ -1457,7 +1457,7 @@ def send_to_node_from_file(self, n: Connection, file, tag):
     def handle_message(self, node: Connection, data) -> None:
         """Callback method to handles incoming data from connections"""
         self.debug_print(
-            f"handle_message from {node.host}:{node.port} -> {data.__sizeof__()/1e6}MB",
+            f"handle_message from {node.host}:{node.port} -> {data.__sizeof__() / 1e6}MB",
             tag="Smartnode",
         )
 

Original file line number	Diff line number	Diff line change
`@@ -1457,7 +1457,7 @@ def send_to_node_from_file(self, n: Connection, file, tag):`
`1457`	`1457`	`def handle_message(self, node: Connection, data) -> None:`
`1458`	`1458`	`"""Callback method to handles incoming data from connections"""`
`1459`	`1459`	`self.debug_print(`
`1460`		`- f"handle_message from {node.host}:{node.port} -> {data.__sizeof__()/1e6}MB",`
	`1460`	`+ f"handle_message from {node.host}:{node.port} -> {data.__sizeof__() / 1e6}MB",`
`1461`	`1461`	`tag="Smartnode",`
`1462`	`1462`	`)`
`1463`	`1463`