Rename a few methods to C# PascalCase (#6)

nietras · web-flow · commit a7e55f7330b5 · 2024-04-16T14:32:13.000+02:00
diff --git a/README.md b/README.md
@@ -35,21 +35,21 @@ namespace nietras.LargeLanguageModel
 {
     public static class Llm
     {
-        public static unsafe void attention_backward(float* dinp, float* dpreatt, float* datt, float* dout, float* inp, float* att, int B, int T, int C, int NH) { }
-        public static unsafe void attention_forward(float* output, float* preatt, float* att, float* inp, int B, int T, int C, int NH) { }
-        public static unsafe void crossentropy_forward(float* losses, float* probs, int* targets, int B, int T, int V) { }
-        public static unsafe void crossentropy_softmax_backward(float* dlogits, float* dlosses, float* probs, int* targets, int B, int T, int V) { }
-        public static unsafe void encoder_backward(float* dwte, float* dwpe, float* dout, int* inp, int B, int T, int C) { }
-        public static unsafe void encoder_forward(float* output, int* inp, float* wte, float* wpe, int B, int T, int C) { }
-        public static unsafe void gelu_backward(float* dinp, float* inp, float* dout, int N) { }
-        public static unsafe void gelu_forward(float* output, float* inp, int N) { }
-        public static unsafe void layernorm_backward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, float* mean, float* rstd, int B, int T, int C) { }
-        public static unsafe void layernorm_forward(float* output, float* mean, float* rstd, float* inp, float* weight, float* bias, int B, int T, int C) { }
-        public static unsafe void matmul_backward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, int B, int T, int C, int OC) { }
-        public static unsafe void matmul_forward(float* output, float* inp, float* weight, float* bias, int B, int T, int C, int OC) { }
-        public static unsafe void residual_backward(float* dinp1, float* dinp2, float* dout, int N) { }
-        public static unsafe void residual_forward(float* output, float* inp1, float* inp2, int N) { }
-        public static unsafe void softmax_forward(float* probs, float* logits, int B, int T, int V) { }
+        public static unsafe void AttentionBackward(float* dinp, float* dpreatt, float* datt, float* dout, float* inp, float* att, int B, int T, int C, int NH) { }
+        public static unsafe void AttentionForward(float* output, float* preatt, float* att, float* inp, int B, int T, int C, int NH) { }
+        public static unsafe void CrossEntropyForward(float* losses, float* probs, int* targets, int B, int T, int V) { }
+        public static unsafe void CrossEntropySoftmaxBackward(float* dlogits, float* dlosses, float* probs, int* targets, int B, int T, int V) { }
+        public static unsafe void EncoderBackward(float* dwte, float* dwpe, float* dout, int* inp, int B, int T, int C) { }
+        public static unsafe void EncoderForward(float* output, int* inp, float* wte, float* wpe, int B, int T, int C) { }
+        public static unsafe void GeLUBackward(float* dinp, float* inp, float* dout, int N) { }
+        public static unsafe void GeLUForward(float* output, float* inp, int N) { }
+        public static unsafe void LayerNormBackward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, float* mean, float* rstd, int B, int T, int C) { }
+        public static unsafe void LayerNormForward(float* output, float* mean, float* rstd, float* inp, float* weight, float* bias, int B, int T, int C) { }
+        public static unsafe void MatMulBackward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, int B, int T, int C, int OC) { }
+        public static unsafe void MatMulForward(float* output, float* inp, float* weight, float* bias, int B, int T, int C, int OC) { }
+        public static unsafe void ResidualBackward(float* dinp1, float* dinp2, float* dout, int N) { }
+        public static unsafe void ResidualForward(float* output, float* inp1, float* inp2, int N) { }
+        public static unsafe void SoftmaxForward(float* probs, float* logits, int B, int T, int V) { }
     }
 }
 ```
diff --git a/src/Llm/Extensions.cs b/src/Llm/Extensions.cs
@@ -1,9 +1,38 @@
-﻿using System.Collections.Generic;
+﻿using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 
 namespace nietras.LargeLanguageModel;
 
 internal static class Extensions
 {
+    public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, Span<T> values)
+        where T : unmanaged
+    {
+        fixed (T* ptr = values)
+        {
+            ReadExactlyUnmanaged(file, ptr, values.Length);
+        }
+    }
+
+    public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, T* values, long count)
+        where T : unmanaged
+    {
+        Span<T> buffer = stackalloc T[(256 * 1024) / Unsafe.SizeOf<T>()];
+        var totalReadCount = 0;
+        while (totalReadCount < count)
+        {
+            var countToRead = (int)Math.Min(buffer.Length, count - totalReadCount);
+            var bufferToRead = buffer.Slice(0, countToRead);
+            var span = MemoryMarshal.Cast<T, byte>(bufferToRead);
+            file.ReadExactly(span);
+            bufferToRead.CopyTo(new Span<T>(values + totalReadCount, countToRead));
+            totalReadCount += countToRead;
+        }
+    }
+
     public static IEnumerable<(int i0, int i1)> Enumerate(int count0, int count1)
     {
         for (var i0 = 0; i0 < count0; i0++)
diff --git a/src/Llm/Gpt2.Test.cs b/src/Llm/Gpt2.Test.cs
@@ -13,7 +13,7 @@ public static unsafe void Test()
         var dataDirectory = Path.Combine(location!, "../../../");
         // build the GPT-2 model from a checkpoint
         GPT2 model;
-        gpt2_build_from_checkpoint(&model, dataDirectory + "gpt2_124M.bin");
+        BuildFromCheckpoint(&model, dataDirectory + "gpt2_124M.bin");
 
         int C = model.config.channels;
         int V = model.config.vocab_size;
@@ -35,7 +35,7 @@ public static unsafe void Test()
         Log($"seq_len: {T}");
 
         ParameterTensors expected_grads;
-        float* expected_grads_memory = malloc_and_point_parameters(&expected_grads, model.param_sizes);
+        float* expected_grads_memory = AllocateAndPointParameters(&expected_grads, model.param_sizes);
 
         // inputs and expected outputs, only used for error checking
         int* x = malloc<int>(B * T);
@@ -61,9 +61,9 @@ public static unsafe void Test()
         {
             stopwatch.Restart();
 
-            gpt2_forward(&model, x, y, B, T);
-            gpt2_zero_grad(&model);
-            gpt2_backward(&model);
+            Forward(&model, x, y, B, T);
+            ZeroGrad(&model);
+            Backward(&model);
 
             double time_elapsed_s = stopwatch.Elapsed.TotalSeconds;
 
@@ -126,7 +126,7 @@ public static unsafe void Test()
                 }
             }
 
-            gpt2_update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.01f, step + 1);
+            Update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.01f, step + 1);
 
             // print the timing information at the end
             Log($"step {step}: loss {model.mean_loss} (took {time_elapsed_s * 1000} ms)");
@@ -168,7 +168,7 @@ public static unsafe void Test()
         free(expected_logits);
         free(expected_loss);
         free(expected_grads_memory);
-        gpt2_free(&model);
+        Free(&model);
     }
 
     // poor man's tensor checker
diff --git a/src/Llm/Gpt2.Train.cs b/src/Llm/Gpt2.Train.cs
@@ -14,7 +14,7 @@ public static unsafe void Train()
         var dataDirectory = Path.Combine(location!, "../../../");
         // build the GPT-2 model from a checkpoint
         GPT2 model;
-        gpt2_build_from_checkpoint(&model, dataDirectory + "gpt2_124M.bin");
+        BuildFromCheckpoint(&model, dataDirectory + "gpt2_124M.bin");
 
         // build the DataLoaders from tokens files. for now use tiny_shakespeare if available, else tiny_stories
         var tiny_stories_train = dataDirectory + "TinyStories_train.bin";
@@ -51,7 +51,7 @@ public static unsafe void Train()
                 for (int i = 0; i < val_num_batches; i++)
                 {
                     val_loader.dataloader_next_batch();
-                    gpt2_forward(&model, val_loader.inputs, val_loader.targets, B, T);
+                    Forward(&model, val_loader.inputs, val_loader.targets, B, T);
                     val_loss += model.mean_loss;
                 }
                 val_loss /= val_num_batches;
@@ -68,7 +68,7 @@ public static unsafe void Train()
                     // for each t, we re-compute all activations between 0 and t
                     // leaving this alone because you want separate code for inference anyway
                     // the inference here is just for sanity checking purposes
-                    gpt2_forward(&model, gen_tokens, null, 1, t);
+                    Forward(&model, gen_tokens, null, 1, t);
                     float* probs = model.acts.probs + (t - 1) * model.config.vocab_size;
                     float coin = random_f32(&rng_state);
                     int next_token = sample_mult(probs, model.config.vocab_size, coin);
@@ -85,15 +85,15 @@ public static unsafe void Train()
             // do a training step
             stopwatch.Restart();
             train_loader.dataloader_next_batch();
-            gpt2_forward(&model, train_loader.inputs, train_loader.targets, B, T);
-            gpt2_zero_grad(&model);
-            gpt2_backward(&model);
-            gpt2_update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.0f, step + 1);
+            Forward(&model, train_loader.inputs, train_loader.targets, B, T);
+            ZeroGrad(&model);
+            Backward(&model);
+            Update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.0f, step + 1);
             double time_elapsed_ms = stopwatch.Elapsed.TotalMilliseconds;
             Log($"step {step}: train loss {model.mean_loss} (took {time_elapsed_ms} ms)");
         }
 
         // free
-        gpt2_free(&model);
+        Free(&model);
     }
 }
diff --git a/src/Llm/Gpt2.cs b/src/Llm/Gpt2.cs
diff --git a/src/Llm/Llm.cs b/src/Llm/Llm.cs