Skip to content

Commit

Permalink
Merge pull request #50 from macrocosm-os/dev
Browse files Browse the repository at this point in the history
Release 1.0.1
  • Loading branch information
Sid-Data-Universe authored Jun 28, 2024
2 parents 2fee2ba + 0afb23f commit b05af4f
Show file tree
Hide file tree
Showing 14 changed files with 236 additions and 105 deletions.
32 changes: 25 additions & 7 deletions competitions/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
from typing import Optional
from typing import List, Optional

import constants
from competitions.data import Competition, CompetitionId
from competitions.data import Competition, CompetitionId, ModelConstraints


def get_competition(id: CompetitionId) -> Optional[Competition]:
"""Returns the competition with the given id, or None if it does not exist."""
for x in constants.COMPETITION_SCHEDULE:
if x.id == id:
return x
def get_model_constraints(id: CompetitionId) -> Optional[ModelConstraints]:
"""Returns the model constraints for the given id, or None if it does not exist."""
return constants.MODEL_CONSTRAINTS_BY_COMPETITION_ID.get(id, None)


def get_competition_for_block(id: CompetitionId, block: int) -> Optional[Competition]:
"""Returns the competition for the given id at the given block, or None if it does not exist."""
competition_schedule = get_competition_schedule_for_block(block)
for comp in competition_schedule:
if comp.id == id:
return comp
return None


def get_competition_schedule_for_block(block: int) -> List[Competition]:
"""Returns the competition schedule at block."""
competition_schedule = None
for b, schedule in constants.COMPETITION_SCHEDULE_BY_BLOCK:
if block >= b:
competition_schedule = schedule
assert (
competition_schedule is not None
), f"No competition schedule found for block {block}"
return competition_schedule
69 changes: 41 additions & 28 deletions constants/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime as dt
import math
from pathlib import Path
from typing import List
from typing import Dict, List, Tuple

import torch
from transformers import (
Expand All @@ -20,7 +20,7 @@
# Project Constants.
# ---------------------------------

__version__ = "1.0.0"
__version__ = "1.0.1"
version_split = __version__.split(".")
__spec_version__ = (
(1000 * int(version_split[0]))
Expand Down Expand Up @@ -54,36 +54,49 @@
WEIGHT_SYNC_MINER_MIN_PERCENT = 0.10
# The root directory of this project.
ROOT_DIR = Path(__file__).parent.parent
# The maximum bytes for the hugging face repo
# The maximum bytes for the hugging face repo.
MAX_HUGGING_FACE_BYTES: int = 15 * 1024 * 1024 * 1024
# TODO: Adjust below to be done by block instead as in 9 with helpers.
# Schedule of model architectures
COMPETITION_SCHEDULE: List[Competition] = [
Competition(
id=CompetitionId.SN9_MODEL,
constraints=ModelConstraints(
max_model_parameter_size=6_900_000_000,
sequence_length=4096,
allowed_architectures=[
MistralForCausalLM,
LlamaForCausalLM,
BartForCausalLM,
FalconForCausalLM,
GPTNeoXForCausalLM,
PhiForCausalLM,
GemmaForCausalLM,
],
tokenizer="Xenova/gpt-4",
kwargs={
"torch_dtype": torch.bfloat16,
"attn_implementation": "flash_attention_2",
},
),
reward_percentage=1.0,
# Defined model constraints by competition id to ensure they are constant across blocks.
MODEL_CONSTRAINTS_BY_COMPETITION_ID: Dict[CompetitionId, ModelConstraints] = {
CompetitionId.SN9_MODEL: ModelConstraints(
max_model_parameter_size=6_900_000_000,
sequence_length=4096,
allowed_architectures=[
MistralForCausalLM,
LlamaForCausalLM,
BartForCausalLM,
FalconForCausalLM,
GPTNeoXForCausalLM,
PhiForCausalLM,
GemmaForCausalLM,
],
tokenizer="Xenova/gpt-4",
kwargs={
"torch_dtype": torch.bfloat16,
"attn_implementation": "flash_attention_2",
},
),
}

# Schedule of competitions by block.
COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [
(
0,
[
Competition(
CompetitionId.SN9_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.SN9_MODEL],
1.0,
)
],
)
]

assert math.isclose(sum(x.reward_percentage for x in COMPETITION_SCHEDULE), 1.0)
for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK:
assert math.isclose(
sum(competition.reward_percentage for competition in block_and_competitions[1]),
1.0,
)

# ---------------------------------
# Miner/Validator Model parameters.
Expand Down
14 changes: 8 additions & 6 deletions finetune/mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@ async def push(
if remote_model_store is None:
remote_model_store = HuggingFaceModelStore()

competition = competition_utils.get_competition(competition_id)
if not competition:
model_constraints = competition_utils.get_model_constraints(competition_id)
if not model_constraints:
raise ValueError("Invalid competition_id")

# First upload the model to HuggingFace.
namespace, name = utils.validate_hf_repo_id(repo)
model_id = ModelId(namespace=namespace, name=name, competition_id=competition_id)
model_id = await remote_model_store.upload_model(
Model(id=model_id, pt_model=model), competition
Model(id=model_id, pt_model=model), model_constraints
)

bt.logging.success("Uploaded model to hugging face.")
Expand Down Expand Up @@ -213,13 +213,15 @@ async def load_remote_model(
if not model_metadata:
raise ValueError(f"No model metadata found for miner {uid}")

competition = competition_utils.get_competition(model_metadata.id.competition_id)
if not competition:
model_constraints = competition_utils.get_model_constraints(
model_metadata.id.competition_id
)
if not model_constraints:
raise ValueError("Invalid competition_id")

bt.logging.success(f"Fetched model metadata: {model_metadata}")
model: Model = await remote_model_store.download_model(
model_metadata.id, download_dir, competition
model_metadata.id, download_dir, model_constraints
)
return model.pt_model

Expand Down
8 changes: 4 additions & 4 deletions finetune/model.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from transformers import AutoTokenizer, PreTrainedTokenizer

from competitions.data import Competition
from competitions.data import ModelConstraints


def load_tokenizer(
competition: Competition, cache_dir: str = None
model_constraints: ModelConstraints, cache_dir: str = None
) -> PreTrainedTokenizer:
"""Returns the fixed tokenizer for the given competition."""
"""Returns the fixed tokenizer for the given model constraints."""
return AutoTokenizer.from_pretrained(
competition.constraints.tokenizer, cache_dir=cache_dir
model_constraints.tokenizer, cache_dir=cache_dir
)
23 changes: 15 additions & 8 deletions model/model_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,20 @@ def __init__(

@staticmethod
def verify_model_satisfies_parameters(model: Model) -> bool:
competition = competition_utils.get_competition(model.id.competition_id)
if not competition:
model_constraints = competition_utils.get_model_constraints(
model.id.competition_id
)
if not model_constraints:
bt.logging.trace(f"No competition found for {model.id.competition_id}")
return False

# Check that the parameter count of the model is within allowed bounds.
parameter_size = sum(p.numel() for p in model.pt_model.parameters())
if parameter_size > competition.constraints.max_model_parameter_size:
if parameter_size > model_constraints.max_model_parameter_size:
return False

# Make sure it's an allowed architecture.
if type(model.pt_model) not in competition.constraints.allowed_architectures:
if type(model.pt_model) not in model_constraints.allowed_architectures:
return False

# Check parameters are sane
Expand Down Expand Up @@ -74,10 +76,13 @@ async def sync_model(self, hotkey: str, force: bool = False) -> bool:
f"No valid metadata found on the chain for hotkey {hotkey}"
)

competition = competition_utils.get_competition(metadata.id.competition_id)
# Check that the metadata indicates a competition available at time of upload.
competition = competition_utils.get_competition_for_block(
metadata.id.competition_id, metadata.block
)
if not competition:
bt.logging.trace(f"No competition found for {metadata.id.competition_id}")
raise ValueError(f"No competition found for {metadata.id.competition_id}")
bt.logging.trace(f"No competition found for {metadata.id.competition_id} at block {metadata.block}")
raise ValueError(f"No competition found for {metadata.id.competition_id} at block {metadata.block}")

# Check what model id the model tracker currently has for this hotkey.
tracker_model_metadata = self.model_tracker.get_model_metadata_for_miner_hotkey(
Expand All @@ -91,7 +96,9 @@ async def sync_model(self, hotkey: str, force: bool = False) -> bool:
path = self.local_store.get_path(hotkey)

# Otherwise we need to download the new model based on the metadata.
model = await self.remote_store.download_model(metadata.id, path, competition)
model = await self.remote_store.download_model(
metadata.id, path, competition.constraints
)

# Update the tracker even if the model fails the following checks to avoid redownloading without new metadata.
self.model_tracker.on_miner_model_updated(hotkey, metadata)
Expand Down
12 changes: 7 additions & 5 deletions model/storage/hugging_face/hugging_face_model_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from huggingface_hub import HfApi
from transformers import AutoModelForCausalLM

from constants import MAX_HUGGING_FACE_BYTES, Competition
from constants import MAX_HUGGING_FACE_BYTES, ModelConstraints
from model.data import Model, ModelId
from model.storage.disk import utils
from model.storage.remote_model_store import RemoteModelStore
Expand All @@ -27,7 +27,9 @@ def get_access_token_if_exists(cls) -> Optional[str]:
"""Returns the access token if it exists."""
return os.getenv("HF_ACCESS_TOKEN")

async def upload_model(self, model: Model, competition: Competition) -> ModelId:
async def upload_model(
self, model: Model, model_constraints: ModelConstraints
) -> ModelId:
"""Uploads a trained model to Hugging Face."""
token = HuggingFaceModelStore.assert_access_token_exists()

Expand All @@ -44,7 +46,7 @@ async def upload_model(self, model: Model, competition: Competition) -> ModelId:
# local tmp directory after which it can be deleted.
with tempfile.TemporaryDirectory() as temp_dir:
model_with_hash = await self.download_model(
model_id_with_commit, temp_dir, competition
model_id_with_commit, temp_dir, model_constraints
)
# Return a ModelId with both the correct commit and hash.
return model_with_hash.id
Expand All @@ -53,7 +55,7 @@ async def download_model(
self,
model_id: ModelId,
local_path: str,
competition: Competition,
model_constraints: ModelConstraints,
) -> Model:
"""Retrieves a trained model from Hugging Face."""
if not model_id.commit:
Expand Down Expand Up @@ -84,7 +86,7 @@ async def download_model(
cache_dir=local_path,
use_safetensors=True,
token=token,
**competition.constraints.kwargs,
**model_constraints.kwargs,
)

# Get the directory the model was stored to.
Expand Down
18 changes: 8 additions & 10 deletions neurons/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,17 @@ async def main(config: bt.config):
else:
use_wandb = True

competition = competition_utils.get_competition(config.competition_id)
if not competition:
model_constraints = competition_utils.get_model_constraints(config.competition_id)
if not model_constraints:
raise RuntimeError(f"No competition found for {config.competition_id}")
kwargs = competition.constraints.kwargs.copy()
kwargs = model_constraints.kwargs.copy()
kwargs["torch_dtype"] = (
torch.bfloat16 if config.dtype == "bfloat16" else torch.float16
)
kwargs["attn_implementation"] = config.attn_implementation

# Init model.
tokenizer = ft.model.load_tokenizer(competition, cache_dir=config.model_dir)
tokenizer = ft.model.load_tokenizer(model_constraints, cache_dir=config.model_dir)
model = await load_starting_model(config, metagraph, chain_metadata_store, kwargs)
model = model.train()
model = model.to(config.device)
Expand Down Expand Up @@ -198,9 +198,7 @@ async def main(config: bt.config):
page_size=config.cortex_steps,
)
bt.logging.debug("Finished loading data")
batches = loader.tokenize(
tokenizer, competition.constraints.sequence_length
)
batches = loader.tokenize(tokenizer, model_constraints.sequence_length)

# Enumerate over the data loader
n_batches = 0
Expand Down Expand Up @@ -262,12 +260,12 @@ async def main(config: bt.config):

# First, reload the best model from the training run.
model_to_upload = ft.mining.load_local_model(
model_dir, competition.constraints.kwargs
model_dir, model_constraints.kwargs
)
await ft.mining.push(
model_to_upload,
config.hf_repo_id,
competition.id,
config.competition_id,
wallet,
update_repo_visibility=config.update_repo_visibility,
metadata_store=chain_metadata_store,
Expand All @@ -292,7 +290,7 @@ async def main(config: bt.config):
config = neuron_config.miner_config()

if config.list_competitions:
print(constants.COMPETITION_SCHEDULE)
print(constants.COMPETITION_SCHEDULE_BY_BLOCK)
else:
print(config)
asyncio.run(main(config))
Loading

0 comments on commit b05af4f

Please sign in to comment.