-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
98 lines (84 loc) · 3.72 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import numpy as np
import pandas as pd
import math
import sklearn
import sklearn.preprocessing
import os
import matplotlib.pyplot as plt
def plot_raw(df, included_volume=True):
''' plot graph of raw data
'''
plt.plot(df['Open'], color='red', label='open')
plt.plot(df['Close'], color='green', label='close')
plt.plot(df['High'], color='blue', label='high')
plt.plot(df['Low'], color='black', label='low')
plt.title('Stock price')
plt.xlabel('Time [days]')
plt.ylabel('Price')
plt.legend(loc='best')
plt.grid(True)
plt.show()
if included_volume :
plt.plot(df['Volume'], color='black', label='volume')
plt.title('Stock volume')
plt.xlabel('Time [days]')
plt.ylabel('Volume')
plt.legend(loc='best')
plt.grid(True)
plt.show()
def normalize_data(df, cols):
''' normalize by using only min max scaler
'''
min_max_scaler = sklearn.preprocessing.MinMaxScaler()
for col in cols:
df[col] = min_max_scaler.fit_transform((df[col].to_numpy()).reshape(-1, 1))
return df[cols]
def load_data(stock, seq_len=20, val_set_size_percentage = 10
, test_set_size_percentage = 10):
data_raw = stock.as_matrix()
data = list()
for index in range(len(data_raw) - seq_len):
data.append(data_raw[index: index+seq_len])
data = np.array(data)
val_set_size = int(np.round(val_set_size_percentage/100*data.shape[0]))
test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]))
train_set_size = data.shape[0] - (val_set_size + test_set_size)
x_train = data[:train_set_size, :-1, :]
y_train = data[:train_set_size, -1, :]
x_val = data[train_set_size:train_set_size + val_set_size, :-1, :]
y_val = data[train_set_size:train_set_size + val_set_size, -1, :]
x_test = data[train_set_size+val_set_size:,:-1,:]
y_test = data[train_set_size+val_set_size:,-1,:]
return [x_train, y_train, x_val, y_val, x_test, y_test]
def plot_result(y_train, y_val, y_test, y_train_pred, y_val_pred, y_test_pred, ft=0, included_test=True):
''' plot graph to compare between prediction and ground truth
'''
plt.plot(np.arange(y_train.shape[0]), y_train[:, ft], color='blue', label='train target')
plt.plot(np.arange(y_train.shape[0], y_train.shape[0] + y_val.shape[0]), y_val[:, ft],
color='gray', label='val target')
plt.plot(np.arange(y_train.shape[0] + y_val.shape[0], y_train.shape[0] + y_val.shape[0] + y_test.shape[0]),
y_test[:, ft], color='black', label='test target')
plt.plot(np.arange(y_train_pred.shape[0]), y_train_pred[:, ft],
color='red', label='train prediction')
plt.plot(np.arange(y_train_pred.shape[0], y_train_pred.shape[0] + y_val_pred.shape[0]),
y_val_pred[:, ft], color='orange', label='val prediction')
plt.plot(np.arange(y_train_pred.shape[0] + y_val_pred.shape[0],
y_train_pred.shape[0] + y_val_pred.shape[0] + y_test_pred.shape[0]),
y_test_pred[:, ft], color ='green', label='test prediction')
plt.title('past and future stock price')
plt.xlabel('Time [Days]')
plt.ylabel('Normalized price')
plt.grid(True)
plt.legend(loc='best')
plt.show()
if included_test:
plt.plot(np.arange(y_train.shape[0], y_train.shape[0] + y_test.shape[0]),
y_test[:, ft], color='black', label='test target')
plt.plot(np.arange(y_train.shape[0], y_train.shape[0] + y_test_pred.shape[0]),
y_test_pred[:, ft], color='green', label='test prediction')
plt.title('future stock prices')
plt.xlabel('Time [Days]')
plt.ylabel('Normalized price')
plt.legend(loc='best')
plt.grid(True)
plt.show()