Merge pull request #50 from macrocosm-os/dev

Release 1.0.1
macrocosm-os · Jun 28, 2024 · b05af4f · b05af4f
2 parents 2fee2ba + 0afb23f
commit b05af4f
Show file tree

Hide file tree

Showing 14 changed files with 236 additions and 105 deletions.
diff --git a/competitions/utils.py b/competitions/utils.py
@@ -1,12 +1,30 @@
-from typing import Optional
+from typing import List, Optional
 
 import constants
-from competitions.data import Competition, CompetitionId
+from competitions.data import Competition, CompetitionId, ModelConstraints
 
 
-def get_competition(id: CompetitionId) -> Optional[Competition]:
-    """Returns the competition with the given id, or None if it does not exist."""
-    for x in constants.COMPETITION_SCHEDULE:
-        if x.id == id:
-            return x
+def get_model_constraints(id: CompetitionId) -> Optional[ModelConstraints]:
+    """Returns the model constraints for the given id, or None if it does not exist."""
+    return constants.MODEL_CONSTRAINTS_BY_COMPETITION_ID.get(id, None)
+
+
+def get_competition_for_block(id: CompetitionId, block: int) -> Optional[Competition]:
+    """Returns the competition for the given id at the given block, or None if it does not exist."""
+    competition_schedule = get_competition_schedule_for_block(block)
+    for comp in competition_schedule:
+        if comp.id == id:
+            return comp
     return None
+
+
+def get_competition_schedule_for_block(block: int) -> List[Competition]:
+    """Returns the competition schedule at block."""
+    competition_schedule = None
+    for b, schedule in constants.COMPETITION_SCHEDULE_BY_BLOCK:
+        if block >= b:
+            competition_schedule = schedule
+    assert (
+        competition_schedule is not None
+    ), f"No competition schedule found for block {block}"
+    return competition_schedule
diff --git a/constants/__init__.py b/constants/__init__.py
@@ -1,7 +1,7 @@
 import datetime as dt
 import math
 from pathlib import Path
-from typing import List
+from typing import Dict, List, Tuple
 
 import torch
 from transformers import (
@@ -20,7 +20,7 @@
 # Project Constants.
 # ---------------------------------
 
-__version__ = "1.0.0"
+__version__ = "1.0.1"
 version_split = __version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))
@@ -54,36 +54,49 @@
 WEIGHT_SYNC_MINER_MIN_PERCENT = 0.10
 # The root directory of this project.
 ROOT_DIR = Path(__file__).parent.parent
-# The maximum bytes for the hugging face repo
+# The maximum bytes for the hugging face repo.
 MAX_HUGGING_FACE_BYTES: int = 15 * 1024 * 1024 * 1024
-# TODO: Adjust below to be done by block instead as in 9 with helpers.
-# Schedule of model architectures
-COMPETITION_SCHEDULE: List[Competition] = [
-    Competition(
-        id=CompetitionId.SN9_MODEL,
-        constraints=ModelConstraints(
-            max_model_parameter_size=6_900_000_000,
-            sequence_length=4096,
-            allowed_architectures=[
-                MistralForCausalLM,
-                LlamaForCausalLM,
-                BartForCausalLM,
-                FalconForCausalLM,
-                GPTNeoXForCausalLM,
-                PhiForCausalLM,
-                GemmaForCausalLM,
-            ],
-            tokenizer="Xenova/gpt-4",
-            kwargs={
-                "torch_dtype": torch.bfloat16,
-                "attn_implementation": "flash_attention_2",
-            },
-        ),
-        reward_percentage=1.0,
+# Defined model constraints by competition id to ensure they are constant across blocks.
+MODEL_CONSTRAINTS_BY_COMPETITION_ID: Dict[CompetitionId, ModelConstraints] = {
+    CompetitionId.SN9_MODEL: ModelConstraints(
+        max_model_parameter_size=6_900_000_000,
+        sequence_length=4096,
+        allowed_architectures=[
+            MistralForCausalLM,
+            LlamaForCausalLM,
+            BartForCausalLM,
+            FalconForCausalLM,
+            GPTNeoXForCausalLM,
+            PhiForCausalLM,
+            GemmaForCausalLM,
+        ],
+        tokenizer="Xenova/gpt-4",
+        kwargs={
+            "torch_dtype": torch.bfloat16,
+            "attn_implementation": "flash_attention_2",
+        },
+    ),
+}
+
+# Schedule of competitions by block.
+COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [
+    (
+        0,
+        [
+            Competition(
+                CompetitionId.SN9_MODEL,
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.SN9_MODEL],
+                1.0,
+            )
+        ],
     )
 ]
 
-assert math.isclose(sum(x.reward_percentage for x in COMPETITION_SCHEDULE), 1.0)
+for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK:
+    assert math.isclose(
+        sum(competition.reward_percentage for competition in block_and_competitions[1]),
+        1.0,
+    )
 
 # ---------------------------------
 # Miner/Validator Model parameters.

diff --git a/finetune/mining.py b/finetune/mining.py
@@ -76,15 +76,15 @@ async def push(
     if remote_model_store is None:
         remote_model_store = HuggingFaceModelStore()
 
-    competition = competition_utils.get_competition(competition_id)
-    if not competition:
+    model_constraints = competition_utils.get_model_constraints(competition_id)
+    if not model_constraints:
         raise ValueError("Invalid competition_id")
 
     # First upload the model to HuggingFace.
     namespace, name = utils.validate_hf_repo_id(repo)
     model_id = ModelId(namespace=namespace, name=name, competition_id=competition_id)
     model_id = await remote_model_store.upload_model(
-        Model(id=model_id, pt_model=model), competition
+        Model(id=model_id, pt_model=model), model_constraints
     )
 
     bt.logging.success("Uploaded model to hugging face.")
@@ -213,13 +213,15 @@ async def load_remote_model(
     if not model_metadata:
         raise ValueError(f"No model metadata found for miner {uid}")
 
-    competition = competition_utils.get_competition(model_metadata.id.competition_id)
-    if not competition:
+    model_constraints = competition_utils.get_model_constraints(
+        model_metadata.id.competition_id
+    )
+    if not model_constraints:
         raise ValueError("Invalid competition_id")
 
     bt.logging.success(f"Fetched model metadata: {model_metadata}")
     model: Model = await remote_model_store.download_model(
-        model_metadata.id, download_dir, competition
+        model_metadata.id, download_dir, model_constraints
     )
     return model.pt_model
 

diff --git a/finetune/model.py b/finetune/model.py
@@ -1,12 +1,12 @@
 from transformers import AutoTokenizer, PreTrainedTokenizer
 
-from competitions.data import Competition
+from competitions.data import ModelConstraints
 
 
 def load_tokenizer(
-    competition: Competition, cache_dir: str = None
+    model_constraints: ModelConstraints, cache_dir: str = None
 ) -> PreTrainedTokenizer:
-    """Returns the fixed tokenizer for the given competition."""
+    """Returns the fixed tokenizer for the given model constraints."""
     return AutoTokenizer.from_pretrained(
-        competition.constraints.tokenizer, cache_dir=cache_dir
+        model_constraints.tokenizer, cache_dir=cache_dir
     )
diff --git a/model/model_updater.py b/model/model_updater.py
@@ -30,18 +30,20 @@ def __init__(
 
     @staticmethod
     def verify_model_satisfies_parameters(model: Model) -> bool:
-        competition = competition_utils.get_competition(model.id.competition_id)
-        if not competition:
+        model_constraints = competition_utils.get_model_constraints(
+            model.id.competition_id
+        )
+        if not model_constraints:
             bt.logging.trace(f"No competition found for {model.id.competition_id}")
             return False
 
         # Check that the parameter count of the model is within allowed bounds.
         parameter_size = sum(p.numel() for p in model.pt_model.parameters())
-        if parameter_size > competition.constraints.max_model_parameter_size:
+        if parameter_size > model_constraints.max_model_parameter_size:
             return False
 
         # Make sure it's an allowed architecture.
-        if type(model.pt_model) not in competition.constraints.allowed_architectures:
+        if type(model.pt_model) not in model_constraints.allowed_architectures:
             return False
 
         # Check parameters are sane
@@ -74,10 +76,13 @@ async def sync_model(self, hotkey: str, force: bool = False) -> bool:
                 f"No valid metadata found on the chain for hotkey {hotkey}"
             )
 
-        competition = competition_utils.get_competition(metadata.id.competition_id)
+        # Check that the metadata indicates a competition available at time of upload.
+        competition = competition_utils.get_competition_for_block(
+            metadata.id.competition_id, metadata.block
+        )
         if not competition:
-            bt.logging.trace(f"No competition found for {metadata.id.competition_id}")
-            raise ValueError(f"No competition found for {metadata.id.competition_id}")
+            bt.logging.trace(f"No competition found for {metadata.id.competition_id} at block {metadata.block}")
+            raise ValueError(f"No competition found for {metadata.id.competition_id} at block {metadata.block}")
 
         # Check what model id the model tracker currently has for this hotkey.
         tracker_model_metadata = self.model_tracker.get_model_metadata_for_miner_hotkey(
@@ -91,7 +96,9 @@ async def sync_model(self, hotkey: str, force: bool = False) -> bool:
         path = self.local_store.get_path(hotkey)
 
         # Otherwise we need to download the new model based on the metadata.
-        model = await self.remote_store.download_model(metadata.id, path, competition)
+        model = await self.remote_store.download_model(
+            metadata.id, path, competition.constraints
+        )
 
         # Update the tracker even if the model fails the following checks to avoid redownloading without new metadata.
         self.model_tracker.on_miner_model_updated(hotkey, metadata)

diff --git a/model/storage/hugging_face/hugging_face_model_store.py b/model/storage/hugging_face/hugging_face_model_store.py
@@ -6,7 +6,7 @@
 from huggingface_hub import HfApi
 from transformers import AutoModelForCausalLM
 
-from constants import MAX_HUGGING_FACE_BYTES, Competition
+from constants import MAX_HUGGING_FACE_BYTES, ModelConstraints
 from model.data import Model, ModelId
 from model.storage.disk import utils
 from model.storage.remote_model_store import RemoteModelStore
@@ -27,7 +27,9 @@ def get_access_token_if_exists(cls) -> Optional[str]:
         """Returns the access token if it exists."""
         return os.getenv("HF_ACCESS_TOKEN")
 
-    async def upload_model(self, model: Model, competition: Competition) -> ModelId:
+    async def upload_model(
+        self, model: Model, model_constraints: ModelConstraints
+    ) -> ModelId:
         """Uploads a trained model to Hugging Face."""
         token = HuggingFaceModelStore.assert_access_token_exists()
 
@@ -44,7 +46,7 @@ async def upload_model(self, model: Model, competition: Competition) -> ModelId:
         # local tmp directory after which it can be deleted.
         with tempfile.TemporaryDirectory() as temp_dir:
             model_with_hash = await self.download_model(
-                model_id_with_commit, temp_dir, competition
+                model_id_with_commit, temp_dir, model_constraints
             )
             # Return a ModelId with both the correct commit and hash.
             return model_with_hash.id
@@ -53,7 +55,7 @@ async def download_model(
         self,
         model_id: ModelId,
         local_path: str,
-        competition: Competition,
+        model_constraints: ModelConstraints,
     ) -> Model:
         """Retrieves a trained model from Hugging Face."""
         if not model_id.commit:
@@ -84,7 +86,7 @@ async def download_model(
             cache_dir=local_path,
             use_safetensors=True,
             token=token,
-            **competition.constraints.kwargs,
+            **model_constraints.kwargs,
         )
 
         # Get the directory the model was stored to.

diff --git a/neurons/miner.py b/neurons/miner.py
@@ -124,17 +124,17 @@ async def main(config: bt.config):
         else:
             use_wandb = True
 
-    competition = competition_utils.get_competition(config.competition_id)
-    if not competition:
+    model_constraints = competition_utils.get_model_constraints(config.competition_id)
+    if not model_constraints:
         raise RuntimeError(f"No competition found for {config.competition_id}")
-    kwargs = competition.constraints.kwargs.copy()
+    kwargs = model_constraints.kwargs.copy()
     kwargs["torch_dtype"] = (
         torch.bfloat16 if config.dtype == "bfloat16" else torch.float16
     )
     kwargs["attn_implementation"] = config.attn_implementation
 
     # Init model.
-    tokenizer = ft.model.load_tokenizer(competition, cache_dir=config.model_dir)
+    tokenizer = ft.model.load_tokenizer(model_constraints, cache_dir=config.model_dir)
     model = await load_starting_model(config, metagraph, chain_metadata_store, kwargs)
     model = model.train()
     model = model.to(config.device)
@@ -198,9 +198,7 @@ async def main(config: bt.config):
                 page_size=config.cortex_steps,
             )
             bt.logging.debug("Finished loading data")
-            batches = loader.tokenize(
-                tokenizer, competition.constraints.sequence_length
-            )
+            batches = loader.tokenize(tokenizer, model_constraints.sequence_length)
 
             # Enumerate over the data loader
             n_batches = 0
@@ -262,12 +260,12 @@ async def main(config: bt.config):
 
                 # First, reload the best model from the training run.
                 model_to_upload = ft.mining.load_local_model(
-                    model_dir, competition.constraints.kwargs
+                    model_dir, model_constraints.kwargs
                 )
                 await ft.mining.push(
                     model_to_upload,
                     config.hf_repo_id,
-                    competition.id,
+                    config.competition_id,
                     wallet,
                     update_repo_visibility=config.update_repo_visibility,
                     metadata_store=chain_metadata_store,
@@ -292,7 +290,7 @@ async def main(config: bt.config):
     config = neuron_config.miner_config()
 
     if config.list_competitions:
-        print(constants.COMPETITION_SCHEDULE)
+        print(constants.COMPETITION_SCHEDULE_BY_BLOCK)
     else:
         print(config)
         asyncio.run(main(config))