Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Example] Add TADF材料分子的光电性质预测 #974

Open
wants to merge 59 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
c0b76d9
merge code of upstream
Zhaoyiou123 Aug 16, 2024
36cc5b5
Merge branch 'PaddlePaddle:develop' into dev_model
YfB1125 Aug 17, 2024
28ac09e
Delete 2024-08 directory
YfB1125 Aug 17, 2024
4e65408
Update test.py
YfB1125 Aug 17, 2024
a77606e
merge code of upstream
Zhaoyiou123 Aug 17, 2024
ab27b8e
pre-commit
Zhaoyiou123 Aug 17, 2024
7f9c80c
pre-commit
Zhaoyiou123 Aug 17, 2024
b111bd4
changes
Zhaoyiou123 Aug 17, 2024
754e841
Update f_ppsci_train.py
YfB1125 Aug 20, 2024
cba37c4
Delete 2024-08 directory
YfB1125 Aug 20, 2024
22ad460
Merge branch 'develop' into dev_model
YfB1125 Aug 20, 2024
e4dda68
Update f_paddle_train.py
YfB1125 Aug 20, 2024
62a04a5
Merge branch 'develop' into dev_model
YfB1125 Aug 23, 2024
d60a01b
'pre-commit'
Zhaoyiou123 Aug 26, 2024
5241419
'pre-commit'
Zhaoyiou123 Aug 26, 2024
2eccbb5
'pre-commit'
Zhaoyiou123 Aug 26, 2024
3bef6a1
Delete Est directory
YfB1125 Aug 26, 2024
34de075
Delete f directory
YfB1125 Aug 26, 2024
96e5cd5
Delete angle directory
YfB1125 Aug 26, 2024
1b9a43c
Add files via upload
YfB1125 Aug 26, 2024
cb3b15a
Update est_paddle_train.py
YfB1125 Oct 7, 2024
a06f763
'yaml'
Zhaoyiou123 Oct 15, 2024
70872c8
Merge branch 'develop' into dev_model
YfB1125 Oct 16, 2024
e412303
'yaml'
Zhaoyiou123 Oct 16, 2024
88168e0
'yaml'
Zhaoyiou123 Oct 16, 2024
09df79d
Update PaddleScience
Zhaoyiou123 Oct 25, 2024
c182b30
'm'
Zhaoyiou123 Dec 7, 2024
5a4a362
'move'
Zhaoyiou123 Dec 7, 2024
2a207b5
'move'
Zhaoyiou123 Dec 7, 2024
e310f5b
'try'
Zhaoyiou123 Dec 20, 2024
b19493e
Merge branch 'PaddlePaddle:develop' into dev_model
YfB1125 Dec 28, 2024
0987157
markdown
YfB1125 Dec 28, 2024
b13b137
Update and rename TADF材料分子筛选-说明.md to TADF.md
YfB1125 Dec 28, 2024
3288bac
'docs'
Zhaoyiou123 Dec 28, 2024
5735c19
'.'
Zhaoyiou123 Dec 28, 2024
af10ed1
Update mkdocs.yml
YfB1125 Dec 28, 2024
c5cad4e
'mkdocs.yml'
Zhaoyiou123 Dec 28, 2024
db5dde9
Update TADF.md
YfB1125 Dec 28, 2024
cfcb947
Update TADF.md
YfB1125 Dec 28, 2024
89edf47
'c'
Zhaoyiou123 Dec 28, 2024
c09f3aa
'm'
Zhaoyiou123 Dec 28, 2024
1319be8
'm'
Zhaoyiou123 Dec 29, 2024
3077522
'main'
Zhaoyiou123 Jan 15, 2025
9d477ed
'main'
Zhaoyiou123 Jan 17, 2025
3b41d12
'change'
Zhaoyiou123 Jan 17, 2025
f00e62c
'delete'
Zhaoyiou123 Feb 12, 2025
e62d62f
'markdown'
Zhaoyiou123 Feb 12, 2025
6ee4a00
Merge branch 'develop' into dev_model
YfB1125 Feb 12, 2025
8c3b916
'delete'
Zhaoyiou123 Feb 13, 2025
b1b0426
Merge branch 'dev_model' of https://github.com/YfB1125/PaddleScience …
Zhaoyiou123 Feb 13, 2025
4cdd6e9
'change'
Zhaoyiou123 Feb 13, 2025
77a4d70
Merge branch 'develop' into dev_model
YfB1125 Feb 14, 2025
540a3cc
'change'
Zhaoyiou123 Feb 25, 2025
cf14639
'pull'
Zhaoyiou123 Feb 25, 2025
17fcaec
Merge branch 'develop' into dev_model
YfB1125 Feb 27, 2025
cfd9292
Merge branch 'develop' into dev_model
YfB1125 Feb 28, 2025
c351456
'markdown'
Zhaoyiou123 Feb 28, 2025
51365d7
'pull'
Zhaoyiou123 Feb 28, 2025
d385cba
'requirements'
Zhaoyiou123 Feb 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5,136 changes: 5,136 additions & 0 deletions TADF/Est/Est.dat

Large diffs are not rendered by default.

186 changes: 186 additions & 0 deletions TADF/Est/est_paddle_train.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个文件和train.py的区别是?

Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# -*- coding: utf-8 -*-
"""
Created on Thu May 30 21:17:07 2024

@author: admin
"""

import numpy as np
import paddle
import rdkit.Chem as Chem
from paddle import nn
from paddle.io import Dataset
from rdkit.Chem import AllChem
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

paddle.device.set_device("cpu")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为什么要指定运行设备为cpu呢

EPOCHS = 200
LR = 0.0001
BATCH = 8

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

data = []
for line in open("D://resources//machine learning//paddle//2024-08//Est.dat"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

使用相对路径,./data/xxx即可

num = float(line.strip())
data.append(num)
smis = []
for line in open("D://resources//machine learning//paddle//2024-08//smis.txt"):
smis.append(line.strip())
vectors = []
del_mol = []

for num in smis:
mol = Chem.MolFromSmiles(num)
try:
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
_input = np.array(list(map(float, fp.ToBitString())))
vectors.append(_input)
except Exception:
del_mol.append(num)
pca = PCA(n_components=0.99)
pca.fit(vectors)
Xlist = pca.transform(vectors)
xtrain, xtest, ytrain, ytest = train_test_split(
Xlist, data, test_size=0.1, random_state=40
)


class Mydataset(Dataset):
def __init__(self, x, y):
super().__init__()
self.x = x
self.y = y
self.src, self.trg = [], []
for i in range(len(self.x)):
self.src.append(self.x[i])
self.trg.append(self.y[i])

def __getitem__(self, index):
return self.src[index], self.trg[index]

def __len__(self):
return len(self.src)


class Net(nn.Layer):
def __init__(self):
super(Net, self).__init__()
self.fc1 = paddle.nn.Linear(in_features=587, out_features=587)
self.fc2 = paddle.nn.Linear(in_features=587, out_features=256)
self.fc3 = paddle.nn.Linear(in_features=256, out_features=1)
self.dropout = paddle.nn.Dropout(p=0.5)
self.relu = paddle.nn.ReLU()

def forward(self, _input):
x = self.fc1(_input)
x = self.relu(x)
x = self.dropout(x)
x = self.fc2(x)
x = self.relu(x)
x = self.dropout(x)
output = self.fc3(x)
return output.squeeze(axis=-1)

def initialize(self):
"""初始化权重"""
for m in self.sublayers():
if isinstance(m, nn.Linear):
paddle.nn.initializer.XavierNormal()(m.weight)


def k_fold(k, i, X, Y):
fold_size = tuple(X.shape)[0] // k
val_start = i * fold_size
if i != k - 1:
val_end = (i + 1) * fold_size
x_val, y_val = X[val_start:val_end], Y[val_start:val_end]
# x_train = paddle.concat(x=(X[0:val_start], X[val_end:]), axis=0)
# y_train = paddle.concat(x=(Y[0:val_start], Y[val_end:]), axis=0)
x_train = np.concatenate((X[0:val_start], X[val_end:]), axis=0)
y_train = np.concatenate((Y[0:val_start], Y[val_end:]), axis=0)
else:
x_val, y_val = X[val_start:], Y[val_start:]
x_train = X[0:val_start]
y_train = Y[0:val_start]
return x_train, y_train, x_val, y_val


def train(model, X_train, Y_train, X_val, Y_val, batchsize, lr, epochs):
train_loader = paddle.io.DataLoader(
Mydataset(X_train, Y_train), batch_size=batchsize, shuffle=True, num_workers=0
)
loss_func = paddle.nn.MSELoss()
optimizer = paddle.optimizer.Adam(
parameters=model.parameters(),
learning_rate=lr,
beta1=(0.9, 0.99)[0],
beta2=(0.9, 0.99)[1],
weight_decay=1e-5,
)
train_Loss = []
val_Loss = []
for epoch in range(epochs):
model.train()
train_loss = 0.0
print(epoch)
for i, data in enumerate(train_loader):
input_, tar = data
output = model(input_)
loss = loss_func(output, tar)
rmse = paddle.sqrt(loss)
rmse.backward()
optimizer.step()
optimizer.clear_grad()
train_loss += loss.item()
train_loss *= batchsize
train_loss /= len(X_train)
train_Loss.append(train_loss)

with paddle.no_grad():
val_pre = model(paddle.to_tensor(X_val))
# val_pre = val_pre*std+mean
val_loss = loss_func(val_pre, paddle.to_tensor(Y_val))
val_loss = paddle.sqrt(val_loss)
val_loss = val_loss.detach().numpy()
val_Loss.append(val_loss)

return train_Loss, val_Loss


def k_train(model, k, X, Y, batch_size, lr, epochs):
train_Loss = []
val_Loss = []
for i in range(k):
model.initialize()
x_train, y_train, x_val, y_val = k_fold(k, i, X, Y)

train_loss, val_loss = train(
model, x_train, y_train, x_val, y_val, batch_size, lr, epochs
)

train_Loss.append(train_loss[-1])
val_Loss.append(val_loss[-1])

return train_Loss, val_Loss


model = Net().astype(dtype="float64")
train_losses, val_losses = k_train(
model, 9, xtrain, ytrain, BATCH, LR, EPOCHS
) # 选择最优验分组
train_i = val_losses.index(min(val_losses))
model.initialize()
x_train, y_train, x_val, y_val = k_fold(9, train_i, xtrain, ytrain) # 以最优分组进行划分
train_loss, val_loss = train(
model, x_train, y_train, x_val, y_val, BATCH, LR, EPOCHS
) # 训练模型
model.eval()
paddle.save(model.state_dict(), "F://pypython_py//paddle//model//est.pdparams")
ytest_pre = model(paddle.to_tensor(xtest))
ytest_pre = ytest_pre.detach().numpy()
with open("F://pypython_py//paddle//train//est.txt", "w") as j:
for num in ytest:
j.write(str(num) + "\n")
with open("F://pypython_py//paddle//train//estpre.txt", "w") as k:
for num in ytest_pre:
k.write(str(num) + "\n")
Binary file added TADF/Est/output/checkpoints/latest.pdopt
Binary file not shown.
Binary file added TADF/Est/output/checkpoints/latest.pdparams
Binary file not shown.
Binary file added TADF/Est/output/checkpoints/latest.pdstates
Binary file not shown.
Loading