-
Notifications
You must be signed in to change notification settings - Fork 190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Example] Add TADF材料分子的光电性质预测 #974
base: develop
Are you sure you want to change the base?
Changes from 19 commits
c0b76d9
36cc5b5
28ac09e
4e65408
a77606e
ab27b8e
7f9c80c
b111bd4
754e841
cba37c4
22ad460
e4dda68
62a04a5
d60a01b
5241419
2eccbb5
3bef6a1
34de075
96e5cd5
1b9a43c
cb3b15a
a06f763
70872c8
e412303
88168e0
09df79d
c182b30
5a4a362
2a207b5
e310f5b
b19493e
0987157
b13b137
3288bac
5735c19
af10ed1
c5cad4e
db5dde9
cfcb947
89edf47
c09f3aa
1319be8
3077522
9d477ed
3b41d12
f00e62c
e62d62f
6ee4a00
8c3b916
b1b0426
4cdd6e9
77a4d70
540a3cc
cf14639
17fcaec
cfd9292
c351456
51365d7
d385cba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Thu May 30 21:17:07 2024 | ||
|
||
@author: admin | ||
""" | ||
|
||
import numpy as np | ||
import paddle | ||
import rdkit.Chem as Chem | ||
from paddle import nn | ||
from paddle.io import Dataset | ||
from rdkit.Chem import AllChem | ||
from sklearn.decomposition import PCA | ||
from sklearn.model_selection import train_test_split | ||
|
||
paddle.device.set_device("cpu") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 为什么要指定运行设备为cpu呢 |
||
EPOCHS = 200 | ||
LR = 0.0001 | ||
BATCH = 8 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
data = [] | ||
for line in open("D://resources//machine learning//paddle//2024-08//Est.dat"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 使用相对路径, |
||
num = float(line.strip()) | ||
data.append(num) | ||
smis = [] | ||
for line in open("D://resources//machine learning//paddle//2024-08//smis.txt"): | ||
YfB1125 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
smis.append(line.strip()) | ||
vectors = [] | ||
del_mol = [] | ||
|
||
for num in smis: | ||
mol = Chem.MolFromSmiles(num) | ||
try: | ||
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048) | ||
_input = np.array(list(map(float, fp.ToBitString()))) | ||
vectors.append(_input) | ||
except Exception: | ||
del_mol.append(num) | ||
pca = PCA(n_components=0.99) | ||
pca.fit(vectors) | ||
Xlist = pca.transform(vectors) | ||
xtrain, xtest, ytrain, ytest = train_test_split( | ||
Xlist, data, test_size=0.1, random_state=40 | ||
) | ||
|
||
|
||
class Mydataset(Dataset): | ||
def __init__(self, x, y): | ||
super().__init__() | ||
self.x = x | ||
self.y = y | ||
self.src, self.trg = [], [] | ||
for i in range(len(self.x)): | ||
self.src.append(self.x[i]) | ||
self.trg.append(self.y[i]) | ||
|
||
def __getitem__(self, index): | ||
return self.src[index], self.trg[index] | ||
|
||
def __len__(self): | ||
return len(self.src) | ||
|
||
|
||
class Net(nn.Layer): | ||
def __init__(self): | ||
super(Net, self).__init__() | ||
self.fc1 = paddle.nn.Linear(in_features=587, out_features=587) | ||
self.fc2 = paddle.nn.Linear(in_features=587, out_features=256) | ||
self.fc3 = paddle.nn.Linear(in_features=256, out_features=1) | ||
self.dropout = paddle.nn.Dropout(p=0.5) | ||
self.relu = paddle.nn.ReLU() | ||
|
||
def forward(self, _input): | ||
x = self.fc1(_input) | ||
x = self.relu(x) | ||
x = self.dropout(x) | ||
x = self.fc2(x) | ||
x = self.relu(x) | ||
x = self.dropout(x) | ||
output = self.fc3(x) | ||
return output.squeeze(axis=-1) | ||
|
||
def initialize(self): | ||
"""初始化权重""" | ||
for m in self.sublayers(): | ||
if isinstance(m, nn.Linear): | ||
paddle.nn.initializer.XavierNormal()(m.weight) | ||
|
||
|
||
def k_fold(k, i, X, Y): | ||
fold_size = tuple(X.shape)[0] // k | ||
val_start = i * fold_size | ||
if i != k - 1: | ||
val_end = (i + 1) * fold_size | ||
x_val, y_val = X[val_start:val_end], Y[val_start:val_end] | ||
# x_train = paddle.concat(x=(X[0:val_start], X[val_end:]), axis=0) | ||
# y_train = paddle.concat(x=(Y[0:val_start], Y[val_end:]), axis=0) | ||
x_train = np.concatenate((X[0:val_start], X[val_end:]), axis=0) | ||
y_train = np.concatenate((Y[0:val_start], Y[val_end:]), axis=0) | ||
else: | ||
x_val, y_val = X[val_start:], Y[val_start:] | ||
x_train = X[0:val_start] | ||
y_train = Y[0:val_start] | ||
return x_train, y_train, x_val, y_val | ||
|
||
|
||
def train(model, X_train, Y_train, X_val, Y_val, batchsize, lr, epochs): | ||
train_loader = paddle.io.DataLoader( | ||
Mydataset(X_train, Y_train), batch_size=batchsize, shuffle=True, num_workers=0 | ||
) | ||
loss_func = paddle.nn.MSELoss() | ||
optimizer = paddle.optimizer.Adam( | ||
parameters=model.parameters(), | ||
learning_rate=lr, | ||
beta1=(0.9, 0.99)[0], | ||
beta2=(0.9, 0.99)[1], | ||
weight_decay=1e-5, | ||
) | ||
train_Loss = [] | ||
val_Loss = [] | ||
for epoch in range(epochs): | ||
model.train() | ||
train_loss = 0.0 | ||
print(epoch) | ||
for i, data in enumerate(train_loader): | ||
input_, tar = data | ||
output = model(input_) | ||
loss = loss_func(output, tar) | ||
rmse = paddle.sqrt(loss) | ||
rmse.backward() | ||
optimizer.step() | ||
optimizer.clear_grad() | ||
train_loss += loss.item() | ||
train_loss *= batchsize | ||
train_loss /= len(X_train) | ||
train_Loss.append(train_loss) | ||
|
||
with paddle.no_grad(): | ||
val_pre = model(paddle.to_tensor(X_val)) | ||
# val_pre = val_pre*std+mean | ||
val_loss = loss_func(val_pre, paddle.to_tensor(Y_val)) | ||
val_loss = paddle.sqrt(val_loss) | ||
val_loss = val_loss.detach().numpy() | ||
val_Loss.append(val_loss) | ||
|
||
return train_Loss, val_Loss | ||
|
||
|
||
def k_train(model, k, X, Y, batch_size, lr, epochs): | ||
train_Loss = [] | ||
val_Loss = [] | ||
for i in range(k): | ||
model.initialize() | ||
x_train, y_train, x_val, y_val = k_fold(k, i, X, Y) | ||
|
||
train_loss, val_loss = train( | ||
model, x_train, y_train, x_val, y_val, batch_size, lr, epochs | ||
) | ||
|
||
train_Loss.append(train_loss[-1]) | ||
val_Loss.append(val_loss[-1]) | ||
|
||
return train_Loss, val_Loss | ||
|
||
|
||
model = Net().astype(dtype="float64") | ||
train_losses, val_losses = k_train( | ||
model, 9, xtrain, ytrain, BATCH, LR, EPOCHS | ||
) # 选择最优验分组 | ||
train_i = val_losses.index(min(val_losses)) | ||
model.initialize() | ||
x_train, y_train, x_val, y_val = k_fold(9, train_i, xtrain, ytrain) # 以最优分组进行划分 | ||
train_loss, val_loss = train( | ||
model, x_train, y_train, x_val, y_val, BATCH, LR, EPOCHS | ||
) # 训练模型 | ||
model.eval() | ||
paddle.save(model.state_dict(), "F://pypython_py//paddle//model//est.pdparams") | ||
ytest_pre = model(paddle.to_tensor(xtest)) | ||
ytest_pre = ytest_pre.detach().numpy() | ||
with open("F://pypython_py//paddle//train//est.txt", "w") as j: | ||
for num in ytest: | ||
j.write(str(num) + "\n") | ||
with open("F://pypython_py//paddle//train//estpre.txt", "w") as k: | ||
for num in ytest_pre: | ||
k.write(str(num) + "\n") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个文件和train.py的区别是?