Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #18

Merged
merged 4 commits into from
Mar 11, 2024
Merged

Dev #18

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ __pycache__/
dist/
output*
.DS_Store
env/
build/
.eggs/
Binary file added data/mnist/raw/t10k-images-idx3-ubyte
Binary file not shown.
Binary file added data/mnist/raw/t10k-images-idx3-ubyte.gz
Binary file not shown.
Binary file added data/mnist/raw/t10k-labels-idx1-ubyte
Binary file not shown.
Binary file added data/mnist/raw/t10k-labels-idx1-ubyte.gz
Binary file not shown.
Binary file added data/mnist/raw/train-images-idx3-ubyte
Binary file not shown.
Binary file added data/mnist/raw/train-images-idx3-ubyte.gz
Binary file not shown.
Binary file added data/mnist/raw/train-labels-idx1-ubyte
Binary file not shown.
Binary file added data/mnist/raw/train-labels-idx1-ubyte.gz
Binary file not shown.
88 changes: 0 additions & 88 deletions example/language_model/lm.py

This file was deleted.

482 changes: 482 additions & 0 deletions example/language_model/makemore_part2_mlp.ipynb

Large diffs are not rendered by default.

105 changes: 105 additions & 0 deletions example/language_model/rnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from math import sqrt

def generate_mapping(data):
chars = sorted(list(set(''.join(data))))
stoi = {char: index + 1 for index, char in enumerate(chars)}
# marks beginning or end of a word
stoi['.'] = 0
return stoi

def generate_learning_rates(size):
lre = torch.linspace(-6, 0, size)
return 10 ** lre # we want the learning rates to be spaced exponentially

def load_data(context_size):
data, label = [], []
words = open('./names.txt', 'r').read().splitlines()
stoi = generate_mapping(words)
# itos = {v: k for k, v in stoi.items()}

for w in words:
context = [0] * context_size
for ch in w + '.':
ix = stoi[ch]
data.append(context)
label.append(ix)
context = context[1:] + [ix] # crop and append

data = torch.tensor(data)
label = torch.tensor(label)
return data, label

def main():
# How much tokens to keep as context when making the prediction for the next one
CONTEXT_SIZE = 3
# Size of the vector to represent a single token
EMBEDDING_SIZE = 10
VOCAB_SIZE = 27 # There are 27 possible chars in our dataset

data, label = load_data(CONTEXT_SIZE)
# Creating an embedding from our data with each token being embedding represented
# by a vector of length "EMBEDDING_SIZE"
C = torch.rand((VOCAB_SIZE, EMBEDDING_SIZE))

NUMBER_OF_NEURONS = 200

# Creating hidden layer
# Using Kaiming init https://pytorch.org/docs/stable/nn.init.html
w1 = torch.rand((CONTEXT_SIZE * EMBEDDING_SIZE, NUMBER_OF_NEURONS)) * ((5/3) / (CONTEXT_SIZE*EMBEDDING_SIZE))
print("First ", ((5/3) / (CONTEXT_SIZE*EMBEDDING_SIZE)))
b1 = torch.rand(NUMBER_OF_NEURONS) * 0.01

# Creating the output layer
w2 = torch.rand((NUMBER_OF_NEURONS, 27)) * ((5/3) / (NUMBER_OF_NEURONS))
print("second ", ((5/3) * sqrt(NUMBER_OF_NEURONS)))
b2 = torch.rand(27) * 0.01

parameters = [C, w1, b1, w2, b2]
print("Number of parameters:", sum(p.nelement() for p in parameters))

for p in parameters:
p.requires_grad = True

used_lrs = []
losses = []

EPOCHS = 200000
MINIBATCH_SIZE = 32
avgs = []
for i in range(EPOCHS):
# Minibatching
minibatch_indexes = torch.randint(0, data.shape[0], (MINIBATCH_SIZE,))
embedding = C[data[minibatch_indexes]]

# Forward pass
h = torch.tanh(embedding.view(-1, EMBEDDING_SIZE * CONTEXT_SIZE) @ w1 + b1)
logits = h @ w2 + b2

loss = F.cross_entropy(logits, label[minibatch_indexes])
for p in parameters:
p.grad = None
loss.backward()

# track stats
if i % 1000 == 0: # print every once in a while
print(f'{i:7d}/{EPOCHS:7d}: {loss.item():.4f}')
if i > EPOCHS / 2:
avgs.append(loss.item())

used_lrs.append(i)
losses.append(loss.item())

lr = 0.1 if i < EPOCHS / 2 else 0.01
for p in parameters:
p.data -= lr * p.grad

print("Average loss", sum(avgs) / len(avgs))
plt.plot(used_lrs, losses)
plt.legend()
plt.show()

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion gigatorch/activation_fn.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
def relu(x):
def relu(x: int) -> int:
return max(0, x)
119 changes: 54 additions & 65 deletions gigatorch/cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,47 +10,56 @@
from abc import ABC, abstractmethod
from os import listdir
from os.path import join
import numpy as np


class Compute(ABC):
@abstractmethod
def compute(self, data) -> List[List[Tensor]]:
def compute(self, input: Tensor) -> Tensor:
pass


"""
The MaxPool2D layer extracts the maximum value over the window defined by pool_size
for each dimension along the features axis. The window is shifted by strides in each dimension.

MaxPool2D accepts a 4-dimensional tensor as input. The dimensions represent:
Batch size: The number of samples in a batch. We can do parallel processing if it's more than 1 batch.
Channels: The number of input channels. For example, an RGB image would have 3 channels.
Height: The height of the input.
Width: The width of the input.
"""
class MaxPool2D(Compute):
def __init__(self, kernel_size, stride=None):
self.kernel_size = kernel_size
self.stride = stride if stride is not None else kernel_size

def compute(self, data_list) -> List[List[Tensor]]:

def compute(self, input: Tensor) -> Tensor:
assert len(input.shape) == 4, f"can't 2d pool {input.shape}"
(batch_size, channels, height, width) = input.shape
assert (height - self.kernel_size) % self.stride == 0, f"Height does not fit the kernel size {self.kernel_size} and stride {self.stride}"
assert (width - self.kernel_size) % self.stride == 0, f"Width does not fit the kernel size {self.kernel_size} and stride {self.stride}"

print("Computing maxpool")
print("Size of data", len(data_list[0]))
print("Number of input", len(data_list))
output = []
for data in data_list:
if len(data) < self.kernel_size or len(data[0]) < self.kernel_size:
raise Exception("Received data is smaller than the kernel_size")

new_data = []
for row_index in range(0, len(data) - self.kernel_size + 1, self.stride):
row = []
for column_index in range(
0, len(data[row_index]) - self.kernel_size + 1, self.stride
):
current_max = 0
for i in range(self.kernel_size):
for j in range(self.kernel_size):
current_max = max(
current_max, data[row_index + i][column_index + j]
)
row.append(current_max)
new_data.append(row)
output.append(new_data)
print("Size of data", len(output[0]))
print("Number of output", len(output))
print("Input shape: ", input.shape)

pooled_height = (height - self.kernel_size) // self.stride + 1
pooled_width = (width - self.kernel_size) // self.stride + 1
output = np.zeros((batch_size, channels, pooled_height, pooled_width))

for b in range(batch_size):
for c in range(channels):
for i in range(pooled_height):
for j in range(pooled_width):
h_start = i * self.stride
h_end = h_start + self.kernel_size
w_start = j * self.stride
w_end = w_start + self.kernel_size
output[b, c, i, j] = np.max(input.data[b, c, h_start:h_end, w_start:w_end])

print("\n")
return output
return Tensor(output)


class Conv2D(Compute):
Expand Down Expand Up @@ -88,45 +97,25 @@ def __init__(self, in_channels, out_channels, kernel_size, activation_fn, stride
self.activation_fn = activation_fn
self.stride = stride

def compute(self, data_list):
print("computing conv2d")
print("Size of data", data_list.shape)
output = Tensor([])
print("Number of kernels", self.kernels.shape)
# Iterate for out_channels number of times
for i in range(self.kernels.shape[0]):
print("output", i)
for layer_index in range(data_list.shape[0]):
print("layer index", layer_index)
data = data_list[layer_index]
kernel = self.kernels[layer_index]
print("data", data.shape)
print("kernel", kernel.shape)

if data.shape[0] < self.kernel_size or data.shape[1] < self.kernel_size:
raise Exception("Received data is smaller than the kernel_size")

new_data = []
for row_index in range(
0, len(data) - self.kernel_size + 1, self.stride
):
row = []
for column_index in range(len(data[0]) - self.kernel_size + 1):
sum = Tensor(0)
for i in range(self.kernel_size):
for j in range(self.kernel_size):
sum += (
data[row_index + i][column_index + j] * kernel[i][j]
)
row.append(self.activation_fn(sum))
new_data.append(row)
output.append(new_data)

print("Size of data", len(output[0]))
print("Number of output", len(output))
print("\n")
return output
def compute(self, input):
(batch_size, _, height, width) = input.shape
output_height = (height - self.kernel_size) // self.stride + 1
output_width = (width - self.kernel_size) // self.stride + 1
output = Tensor(np.zeros((batch_size, self.out_channels, output_height, output_width)))

for b in range(batch_size):
for k in range(self.out_channels):
for i in range(output_height):
for j in range(output_width):
h_start = i * self.stride
h_end = h_start + self.kernel_size
w_start = j * self.stride
w_end = w_start + self.kernel_size
output[b, k, i, j] = self.activation_fn(
np.sum(input[b, :, h_start:h_end, w_start:w_end] * self.kernels[k])
)

return output

class CNN:
def __init__(self, train_data_dir, test_data_dir, categories):
Expand Down
1 change: 0 additions & 1 deletion gigatorch/nn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import random
from typing import List
from gigatorch.tensor import Tensor


Expand Down
1 change: 0 additions & 1 deletion gigatorch/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def _backprop():
self.grad += (out.data > 0) * out.grad

out._backprop = _backprop

return out

def to(self, new_type):
Expand Down
21 changes: 21 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
black==24.2.0
click==8.1.7
filelock==3.13.1
fsspec==2024.2.0
iniconfig==2.0.0
Jinja2==3.1.3
MarkupSafe==2.1.5
mpmath==1.3.0
mypy-extensions==1.0.0
networkx==3.2.1
numpy==1.26.4
packaging==23.2
pathspec==0.12.1
pillow==10.2.0
platformdirs==4.2.0
pluggy==1.4.0
pytest==8.0.2
setuptools-black==0.1.5
sympy==1.12
torch==2.2.1
typing_extensions==4.10.0
Binary file added temp/0/1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/108.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/114.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/118.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/21.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/34.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/37.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/51.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/56.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/63.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/68.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/69.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/75.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/81.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/88.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added temp/0/95.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading