Skip to content

Commit

Permalink
adding docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
rajithkrishnegowda committed Jan 24, 2025
1 parent c5890de commit 30851e0
Showing 1 changed file with 111 additions and 2 deletions.
113 changes: 111 additions & 2 deletions openfl-tutorials/experimental/workflow/LLM/phi-4.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,16 @@
"outputs": [],
"source": [
"def file_checksum(file_path, algorithm=\"sha256\"):\n",
" \"\"\"\n",
" Calculate the checksum of a file using the specified hashing algorithm.\n",
"\n",
" Parameters:\n",
" file_path (str): The path to the file for which the checksum is to be calculated.\n",
" algorithm (str): The hashing algorithm to use (default is 'sha256').\n",
"\n",
" Returns:\n",
" str: The calculated checksum of the file.\n",
" \"\"\"\n",
" hash_func = hashlib.new(algorithm)\n",
" with open(file_path, \"rb\") as f:\n",
" for chunk in iter(lambda: f.read(4096), b\"\"):\n",
Expand Down Expand Up @@ -259,6 +269,15 @@
"outputs": [],
"source": [
"def generate_prompt(data_point):\n",
" \"\"\"\n",
" Generate a prompt based on the given data point.\n",
"\n",
" Parameters:\n",
" data_point (dict): A dictionary containing the instruction, input, and output.\n",
"\n",
" Returns:\n",
" str: The generated prompt as a string.\n",
" \"\"\"\n",
" if data_point[\"input\"]:\n",
" return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n",
"\n",
Expand All @@ -281,8 +300,16 @@
"\n",
"\n",
"def tokenize(prompt, add_eos_token=True):\n",
" # there's probably a way to do this with the tokenizer settings\n",
" # but again, gotta move fast\n",
" \"\"\"\n",
" Tokenize the given prompt.\n",
"\n",
" Parameters:\n",
" prompt (str): The prompt to be tokenized.\n",
" add_eos_token (bool): Whether to add an end-of-sequence token (default is True).\n",
"\n",
" Returns:\n",
" dict: A dictionary containing the tokenized input IDs and attention mask.\n",
" \"\"\"\n",
" result = tokenizer(\n",
" prompt,\n",
" truncation=True,\n",
Expand All @@ -304,6 +331,15 @@
"\n",
"\n",
"def generate_and_tokenize_prompt(data_point):\n",
" \"\"\"\n",
" Generate and tokenize a prompt based on the given data point.\n",
"\n",
" Parameters:\n",
" data_point (dict): A dictionary containing the instruction, input, and output.\n",
"\n",
" Returns:\n",
" dict: A dictionary containing the tokenized input IDs, attention mask, and labels.\n",
" \"\"\"\n",
" full_prompt = generate_prompt(data_point)\n",
" tokenized_full_prompt = tokenize(full_prompt)\n",
" user_prompt = generate_prompt({**data_point, \"output\": \"\"})\n",
Expand Down Expand Up @@ -344,6 +380,17 @@
"outputs": [],
"source": [
"def FedAvg(peft_params, model, weights=None):\n",
" \"\"\"\n",
" Perform Federated Averaging (FedAvg) on the model parameters.\n",
"\n",
" Parameters:\n",
" peft_params (list): A list of state dictionaries containing the model parameters from different clients.\n",
" model (torch.nn.Module): The model to which the averaged parameters will be applied.\n",
" weights (list, optional): A list of weights for averaging the parameters. If None, equal weights are used.\n",
"\n",
" Returns:\n",
" torch.nn.Module: The model with the averaged parameters applied.\n",
" \"\"\"\n",
" state_dicts = peft_params\n",
" state_dict = get_peft_model_state_dict(model)\n",
" for key in peft_params[0]:\n",
Expand Down Expand Up @@ -388,6 +435,18 @@
"source": [
"class FederatedFlow(FLSpec):\n",
" def __init__(self, model=None, optimizer=None, rounds=3, **kwargs):\n",
" \"\"\"\n",
" Initialize the class with the given model, optimizer, and number of rounds.\n",
"\n",
" Parameters:\n",
" model (torch.nn.Module, optional): The model to be used. If None, a ValueError is raised.\n",
" optimizer (torch.optim.Optimizer, optional): The optimizer to be used.\n",
" rounds (int, optional): The number of rounds for training or processing (default is 3).\n",
" **kwargs: Additional keyword arguments to be passed to the superclass initializer.\n",
"\n",
" Raises:\n",
" ValueError: If no model is provided.\n",
" \"\"\"\n",
" super().__init__(**kwargs)\n",
" if model is not None:\n",
" self.model = model\n",
Expand All @@ -401,6 +460,13 @@
"\n",
" @aggregator\n",
" def start(self):\n",
" \"\"\"\n",
" Initialize the model and set up the collaborators for federated learning.\n",
"\n",
" This method performs the initial setup for the model, including setting the\n",
" collaborators, initializing private variables, and starting the first round\n",
" of the federated learning process.\n",
" \"\"\"\n",
" print(f\"Performing initialization for model\")\n",
" self.collaborators = self.runtime.collaborators\n",
" self.private = 10\n",
Expand All @@ -421,6 +487,13 @@
" \n",
" @collaborator\n",
" def aggregated_model_validation(self):\n",
" \"\"\"\n",
" Perform aggregated model validation for a collaborator.\n",
"\n",
" This method loads the model, applies the PEFT configuration, and evaluates\n",
" the model using the provided training and evaluation datasets. The validation\n",
" score is then stored and the next step in the process is triggered.\n",
" \"\"\"\n",
" print(f\"Performing aggregated model validation for collaborator {self.input}\")\n",
" self.model = AutoModelForCausalLM.from_pretrained(\n",
" checkpoint_path, return_dict=True, **model_kwargs\n",
Expand Down Expand Up @@ -450,6 +523,13 @@
"\n",
" @collaborator\n",
" def train(self):\n",
" \"\"\"\n",
" Train the model for a collaborator.\n",
"\n",
" This method trains the model using the provided training and evaluation datasets.\n",
" The training loss is stored, the model is saved, and the next step in the process\n",
" is triggered.\n",
" \"\"\"\n",
" trainer = SFTTrainer(\n",
" model=self.model,\n",
" args=train_conf,\n",
Expand All @@ -473,6 +553,13 @@
"\n",
" @collaborator\n",
" def local_model_validation(self):\n",
" \"\"\"\n",
" Perform local model validation for a collaborator.\n",
"\n",
" This method evaluates the model using the provided training and evaluation datasets.\n",
" The validation score is stored, the PEFT parameters are updated, and the next step\n",
" in the process is triggered.\n",
" \"\"\"\n",
" trainer = SFTTrainer(\n",
" model=self.model,\n",
" args=train_conf,\n",
Expand All @@ -495,6 +582,13 @@
"\n",
" @aggregator\n",
" def join(self, inputs):\n",
" \"\"\"\n",
" Aggregate the results from all collaborators and update the model.\n",
"\n",
" This method calculates the average loss, aggregated model accuracy, and local model\n",
" accuracy from all collaborators. The model parameters are updated using Federated\n",
" Averaging (FedAvg), and the next round of the process is triggered if applicable.\n",
" \"\"\"\n",
" self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n",
" self.aggregated_model_accuracy = sum(\n",
" input.agg_validation_score for input in inputs\n",
Expand Down Expand Up @@ -525,6 +619,12 @@
"\n",
" @aggregator\n",
" def end(self):\n",
" \"\"\"\n",
" End the federated learning process.\n",
"\n",
" This method marks the end of the federated learning process and performs any\n",
" necessary cleanup or finalization steps.\n",
" \"\"\"\n",
" print(f\"This is the end of the flow\")\n"
]
},
Expand Down Expand Up @@ -630,6 +730,15 @@
"\n",
"# Define the function to start the federated learning process with user-specified rounds and display the output\n",
"def start_federated_learning(rounds):\n",
" \"\"\"\n",
" Start the federated learning process for the specified number of rounds.\n",
"\n",
" Parameters:\n",
" rounds (int): The number of rounds for the federated learning process.\n",
"\n",
" Returns:\n",
" tuple: A tuple containing the aggregated model accuracy, average loss, and local model accuracy.\n",
" \"\"\"\n",
" flflow = FederatedFlow(model, rounds=rounds)\n",
" flflow.runtime = local_runtime\n",
" flflow.run()\n",
Expand Down

0 comments on commit 30851e0

Please sign in to comment.