adding docstring

securefederatedai · Jan 24, 2025 · 30851e0 · 30851e0
1 parent c5890de
commit 30851e0
Showing 1 changed file with 111 additions and 2 deletions.
diff --git a/openfl-tutorials/experimental/workflow/LLM/phi-4.ipynb b/openfl-tutorials/experimental/workflow/LLM/phi-4.ipynb
@@ -125,6 +125,16 @@
    "outputs": [],
    "source": [
     "def file_checksum(file_path, algorithm=\"sha256\"):\n",
+    "    \"\"\"\n",
+    "    Calculate the checksum of a file using the specified hashing algorithm.\n",
+    "\n",
+    "    Parameters:\n",
+    "    file_path (str): The path to the file for which the checksum is to be calculated.\n",
+    "    algorithm (str): The hashing algorithm to use (default is 'sha256').\n",
+    "\n",
+    "    Returns:\n",
+    "    str: The calculated checksum of the file.\n",
+    "    \"\"\"\n",
     "    hash_func = hashlib.new(algorithm)\n",
     "    with open(file_path, \"rb\") as f:\n",
     "        for chunk in iter(lambda: f.read(4096), b\"\"):\n",
@@ -259,6 +269,15 @@
    "outputs": [],
    "source": [
     "def generate_prompt(data_point):\n",
+    "    \"\"\"\n",
+    "    Generate a prompt based on the given data point.\n",
+    "\n",
+    "    Parameters:\n",
+    "    data_point (dict): A dictionary containing the instruction, input, and output.\n",
+    "\n",
+    "    Returns:\n",
+    "    str: The generated prompt as a string.\n",
+    "    \"\"\"\n",
     "    if data_point[\"input\"]:\n",
     "        return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n",
     "\n",
@@ -281,8 +300,16 @@
     "\n",
     "\n",
     "def tokenize(prompt, add_eos_token=True):\n",
-    "    # there's probably a way to do this with the tokenizer settings\n",
-    "    # but again, gotta move fast\n",
+    "    \"\"\"\n",
+    "    Tokenize the given prompt.\n",
+    "\n",
+    "    Parameters:\n",
+    "    prompt (str): The prompt to be tokenized.\n",
+    "    add_eos_token (bool): Whether to add an end-of-sequence token (default is True).\n",
+    "\n",
+    "    Returns:\n",
+    "    dict: A dictionary containing the tokenized input IDs and attention mask.\n",
+    "    \"\"\"\n",
     "    result = tokenizer(\n",
     "        prompt,\n",
     "        truncation=True,\n",
@@ -304,6 +331,15 @@
     "\n",
     "\n",
     "def generate_and_tokenize_prompt(data_point):\n",
+    "    \"\"\"\n",
+    "    Generate and tokenize a prompt based on the given data point.\n",
+    "\n",
+    "    Parameters:\n",
+    "    data_point (dict): A dictionary containing the instruction, input, and output.\n",
+    "\n",
+    "    Returns:\n",
+    "    dict: A dictionary containing the tokenized input IDs, attention mask, and labels.\n",
+    "    \"\"\"\n",
     "    full_prompt = generate_prompt(data_point)\n",
     "    tokenized_full_prompt = tokenize(full_prompt)\n",
     "    user_prompt = generate_prompt({**data_point, \"output\": \"\"})\n",
@@ -344,6 +380,17 @@
    "outputs": [],
    "source": [
     "def FedAvg(peft_params, model, weights=None):\n",
+    "    \"\"\"\n",
+    "    Perform Federated Averaging (FedAvg) on the model parameters.\n",
+    "\n",
+    "    Parameters:\n",
+    "    peft_params (list): A list of state dictionaries containing the model parameters from different clients.\n",
+    "    model (torch.nn.Module): The model to which the averaged parameters will be applied.\n",
+    "    weights (list, optional): A list of weights for averaging the parameters. If None, equal weights are used.\n",
+    "\n",
+    "    Returns:\n",
+    "    torch.nn.Module: The model with the averaged parameters applied.\n",
+    "    \"\"\"\n",
     "    state_dicts = peft_params\n",
     "    state_dict = get_peft_model_state_dict(model)\n",
     "    for key in peft_params[0]:\n",
@@ -388,6 +435,18 @@
    "source": [
     "class FederatedFlow(FLSpec):\n",
     "    def __init__(self, model=None, optimizer=None, rounds=3, **kwargs):\n",
+    "        \"\"\"\n",
+    "        Initialize the class with the given model, optimizer, and number of rounds.\n",
+    "\n",
+    "        Parameters:\n",
+    "        model (torch.nn.Module, optional): The model to be used. If None, a ValueError is raised.\n",
+    "        optimizer (torch.optim.Optimizer, optional): The optimizer to be used.\n",
+    "        rounds (int, optional): The number of rounds for training or processing (default is 3).\n",
+    "        **kwargs: Additional keyword arguments to be passed to the superclass initializer.\n",
+    "\n",
+    "        Raises:\n",
+    "        ValueError: If no model is provided.\n",
+    "        \"\"\"\n",
     "        super().__init__(**kwargs)\n",
     "        if model is not None:\n",
     "            self.model = model\n",
@@ -401,6 +460,13 @@
     "\n",
     "    @aggregator\n",
     "    def start(self):\n",
+    "        \"\"\"\n",
+    "        Initialize the model and set up the collaborators for federated learning.\n",
+    "\n",
+    "        This method performs the initial setup for the model, including setting the\n",
+    "        collaborators, initializing private variables, and starting the first round\n",
+    "        of the federated learning process.\n",
+    "        \"\"\"\n",
     "        print(f\"Performing initialization for model\")\n",
     "        self.collaborators = self.runtime.collaborators\n",
     "        self.private = 10\n",
@@ -421,6 +487,13 @@
     "    \n",
     "    @collaborator\n",
     "    def aggregated_model_validation(self):\n",
+    "        \"\"\"\n",
+    "        Perform aggregated model validation for a collaborator.\n",
+    "\n",
+    "        This method loads the model, applies the PEFT configuration, and evaluates\n",
+    "        the model using the provided training and evaluation datasets. The validation\n",
+    "        score is then stored and the next step in the process is triggered.\n",
+    "        \"\"\"\n",
     "        print(f\"Performing aggregated model validation for collaborator {self.input}\")\n",
     "        self.model = AutoModelForCausalLM.from_pretrained(\n",
     "            checkpoint_path, return_dict=True, **model_kwargs\n",
@@ -450,6 +523,13 @@
     "\n",
     "    @collaborator\n",
     "    def train(self):\n",
+    "        \"\"\"\n",
+    "        Train the model for a collaborator.\n",
+    "\n",
+    "        This method trains the model using the provided training and evaluation datasets.\n",
+    "        The training loss is stored, the model is saved, and the next step in the process\n",
+    "        is triggered.\n",
+    "        \"\"\"\n",
     "        trainer = SFTTrainer(\n",
     "            model=self.model,\n",
     "            args=train_conf,\n",
@@ -473,6 +553,13 @@
     "\n",
     "    @collaborator\n",
     "    def local_model_validation(self):\n",
+    "        \"\"\"\n",
+    "        Perform local model validation for a collaborator.\n",
+    "\n",
+    "        This method evaluates the model using the provided training and evaluation datasets.\n",
+    "        The validation score is stored, the PEFT parameters are updated, and the next step\n",
+    "        in the process is triggered.\n",
+    "        \"\"\"\n",
     "        trainer = SFTTrainer(\n",
     "            model=self.model,\n",
     "            args=train_conf,\n",
@@ -495,6 +582,13 @@
     "\n",
     "    @aggregator\n",
     "    def join(self, inputs):\n",
+    "        \"\"\"\n",
+    "        Aggregate the results from all collaborators and update the model.\n",
+    "\n",
+    "        This method calculates the average loss, aggregated model accuracy, and local model\n",
+    "        accuracy from all collaborators. The model parameters are updated using Federated\n",
+    "        Averaging (FedAvg), and the next round of the process is triggered if applicable.\n",
+    "        \"\"\"\n",
     "        self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n",
     "        self.aggregated_model_accuracy = sum(\n",
     "            input.agg_validation_score for input in inputs\n",
@@ -525,6 +619,12 @@
     "\n",
     "    @aggregator\n",
     "    def end(self):\n",
+    "        \"\"\"\n",
+    "        End the federated learning process.\n",
+    "\n",
+    "        This method marks the end of the federated learning process and performs any\n",
+    "        necessary cleanup or finalization steps.\n",
+    "        \"\"\"\n",
     "        print(f\"This is the end of the flow\")\n"
    ]
   },
@@ -630,6 +730,15 @@
     "\n",
     "# Define the function to start the federated learning process with user-specified rounds and display the output\n",
     "def start_federated_learning(rounds):\n",
+    "    \"\"\"\n",
+    "    Start the federated learning process for the specified number of rounds.\n",
+    "\n",
+    "    Parameters:\n",
+    "    rounds (int): The number of rounds for the federated learning process.\n",
+    "\n",
+    "    Returns:\n",
+    "    tuple: A tuple containing the aggregated model accuracy, average loss, and local model accuracy.\n",
+    "    \"\"\"\n",
     "    flflow = FederatedFlow(model, rounds=rounds)\n",
     "    flflow.runtime = local_runtime\n",
     "    flflow.run()\n",