-
Notifications
You must be signed in to change notification settings - Fork 1
/
pretrainm5.py
103 lines (86 loc) · 2.74 KB
/
pretrainm5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import torch
from hails.hails import HAILS_Univ
from hails.seq_layers import DLinear, NLinear
from torch.optim import AdamW
from torch.utils.data import DataLoader
from ts_utils.datasets import HierarchicalTimeSeriesDataset
from ts_utils.m5_dataset import get_dataset, get_datasets
from ts_utils.utils import prob_poisson, prob_poisson_dispersion, set_seed
SEED = 42
PRED_LEN = 28
SEQ_LEN = 112 # past 4 months
NUM_WORKERS = 1
USE_DISPERSION = False
MODEL_TYPE = "DLinear"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
PRETRAIN_LR = 1e-3
BATCH_SIZE = 64
PRE_TRAIN_EPOCHS = 10
set_seed(SEED)
train_dataset, text_dataset, _, _ = get_datasets()
train_dataset, train_hmatrix = get_dataset(train_dataset)
text_dataset, text_hmatrix = get_dataset(text_dataset)
print(f"{train_dataset.shape=}, {train_hmatrix.shape=}")
print(f"{text_dataset.shape=}, {text_hmatrix.shape=}")
train_hmatrix = train_hmatrix.to(DEVICE)
# text_hmatrix = text_hmatrix.to(DEVICE)
dist_mask = (
prob_poisson(train_dataset).to(DEVICE)
if not USE_DISPERSION
else prob_poisson_dispersion(train_dataset).to(DEVICE)
)
print(
f"Percentage of Poisson distributed nodes: {dist_mask.sum().item()/dist_mask.size(0)*100:.2f}%"
)
train_dataset_obj = HierarchicalTimeSeriesDataset(
train_dataset, PRED_LEN, SEQ_LEN, None, train_hmatrix
)
test_dataset_obj = HierarchicalTimeSeriesDataset(
text_dataset, PRED_LEN, SEQ_LEN, None, text_hmatrix
)
print(f"{len(train_dataset_obj.time_series_dataset)= }")
train_loader = DataLoader(
train_dataset_obj.time_series_dataset,
shuffle=True,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
pin_memory=True,
)
test_loader = DataLoader(
test_dataset_obj.time_series_dataset,
shuffle=False,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
pin_memory=True,
)
hails = HAILS_Univ(
num_nodes=train_dataset.shape[-1],
seq_len=SEQ_LEN,
pred_len=PRED_LEN,
pred_model=DLinear if MODEL_TYPE == "DLinear" else NLinear,
corem_c=5,
).to(DEVICE)
print(hails)
# Pre-train
optimizer = AdamW(hails.parameters(), lr=PRETRAIN_LR)
def pre_train_step():
hails.train()
losses = []
for x, y in train_loader:
x = x.to(DEVICE)
y = y.to(DEVICE)
optimizer.zero_grad()
mu, logstd = hails._forward_base(x)
loss = hails.get_ll_loss(mu, logstd, y, dist_mask).mean()
loss.backward()
optimizer.step()
losses.append(loss.item())
return sum(losses) / len(losses)
print("Pre-training...")
for ep in range(PRE_TRAIN_EPOCHS):
loss = pre_train_step()
print(f"Epoch {ep+1}/{PRE_TRAIN_EPOCHS}, Loss: {loss:.4f}")
print("Pre-training done!")
# Save pre-trained model
torch.save(hails.state_dict(), "pretrained_m5.pth")
print("Pre-trained model saved!")