-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
108 lines (80 loc) · 2.8 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
from scipy.signal import sawtooth
import torch
from torch.utils.data import Dataset
class WindowData(Dataset):
def __init__(self, data, window_length):
n_windows = len(data) % window_length
windows = [
(i * window_length, (i + 1) * window_length) for i in range(n_windows)
]
self.data = [data[window[0] : window[1]] for window in windows]
def __len__(self):
return len(self.data)
def __getitem__(self, item):
return self.data[item]
def make_lds_data(A, Q, C, R, T):
P, N = C.shape
x = [torch.randn(N)]
for t in range(T - 1):
old_x = x[t]
new_x = A @ old_x + Q @ torch.randn(N)
x.append(new_x)
x = torch.stack(x)
y = x @ C.T + torch.randn(T, P) @ R.T
return y
def make_dot_data(
image_width, T, num_steps, x0=0.0, v=0.5, render_sigma=0.2, noise_sigma=0.1
):
def triangle(t):
return sawtooth(np.pi * t, width=0.5)
def dot_trajectory(t):
return triangle(v * (t + (1 + x0) / 2))
def render(x):
return np.exp(-0.5 * ((x - grid) / render_sigma) ** 2)
grid = np.linspace(-1, 1, image_width, endpoint=True)
images = np.vstack(
[render(dot_trajectory(t)) for t in np.linspace(0, T, num_steps)]
)
return images + noise_sigma * npr.randn(*images.shape)
def make_pinwheel_data(radial_std, tangential_std, num_classes, num_per_class, rate):
"""
source: https://github.com/mattjj/svae/blob/master/experiments/gmm_svae_synth.py
"""
rads = np.linspace(0, 2 * np.pi, num_classes, endpoint=False)
features = npr.randn(num_classes * num_per_class, 2) * np.array(
[radial_std, tangential_std]
)
features[:, 0] += 1.0
labels = np.repeat(np.arange(num_classes), num_per_class)
angles = rads[labels] + rate * np.exp(features[:, 0])
rotations = np.stack(
[np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)]
)
rotations = np.reshape(rotations.T, (-1, 2, 2))
return 10 * npr.permutation(np.einsum("ti,tij->tj", features, rotations))
def make_two_cluster_data(num_per_class):
"""Make two gaussian clusters.
Parameters
----------
num_per_class: int
number of samples per class
Returns
-------
"""
mu = np.array([[-1, 1], [1, 1]]) * 4
Sigma = [[[1, 0], [0, 1]], [[1, 0], [0, 1]]]
data = []
for m, S in zip(mu, Sigma):
samples = npr.multivariate_normal(m, S, num_per_class)
data.append(samples)
return np.concatenate(data)
if __name__ == "__main__":
# generate synthetic data
# data = make_pinwheel_data(0.3, 0.05, 5, 100, 0.25)
# generate two clusters
data = make_two_cluster_data(100)
plt.scatter(data[:, 0], data[:, 1])
plt.show()