diff --git a/README.md b/README.md index 5a12c5d9..d51ca559 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,6 @@ original implementations (with proper licencing). * **[MoGAT](kgcnn/literature/MoGAT)**: [Multi-order graph attention network for water solubility prediction and interpretation](https://www.nature.com/articles/s41598-022-25701-5) by Lee et al. (2023) * **[MXMNet](kgcnn/literature/MXMNet)**: [Molecular Mechanics-Driven Graph Neural Network with Multiplex Graph for Molecular Structures](https://arxiv.org/abs/2011.07457) by Zhang et al. (2020) * **[NMPN](kgcnn/literature/NMPN)**: [Neural Message Passing for Quantum Chemistry](http://arxiv.org/abs/1704.01212) by Gilmer et al. (2017) -* **[Unet](kgcnn/literature/Unet)**: [Graph U-Nets](http://proceedings.mlr.press/v97/gao19a/gao19a.pdf) by H. Gao and S. Ji (2019) * **[PAiNN](kgcnn/literature/PAiNN)**: [Equivariant message passing for the prediction of tensorial properties and molecular spectra](https://arxiv.org/pdf/2102.03150.pdf) by Schütt et al. (2020) * **[RGCN](kgcnn/literature/RGCN)**: [Modeling Relational Data with Graph Convolutional Networks](https://arxiv.org/abs/1703.06103) by Schlichtkrull et al. (2017) * **[rGIN](kgcnn/literature/rGIN)** [Random Features Strengthen Graph Neural Networks](https://arxiv.org/abs/2002.03155) by Sato et al. (2020) diff --git a/kgcnn/data/base.py b/kgcnn/data/base.py index 2cd302bb..41bdc2a1 100644 --- a/kgcnn/data/base.py +++ b/kgcnn/data/base.py @@ -330,7 +330,8 @@ def rename_property_on_graphs(self, old_property_name: str, new_property_name: s get = obtain_property def tf_disjoint_data_generator(self, inputs, outputs, **kwargs): - module_logger.info("Dataloader is experimental and not fully tested nor stable.") + assert isinstance(inputs, list), "Dictionary input is not yet implemented" + module_logger.info("Dataloader is experimental and not fully tested or stable.") return experimental_tf_disjoint_list_generator(self, inputs=inputs, outputs=outputs, **kwargs) diff --git a/kgcnn/io/loader.py b/kgcnn/io/loader.py index 3c9c9c60..ac645427 100644 --- a/kgcnn/io/loader.py +++ b/kgcnn/io/loader.py @@ -1,4 +1,5 @@ import keras as ks +from typing import Union import numpy as np import tensorflow as tf @@ -90,3 +91,97 @@ def generator(): ) return data_loader + + +def tf_disjoint_list_generator( + graphs, + inputs: list, + outputs: list, + assignment_to_id: list = None, + assignment_of_indices: list = None, + flag_batch_id: list = None, + flag_count: list = None, + flag_subgraph_id: list = None, + batch_size=32, + shuffle=True +): + + def generator(): + dataset_size = len(graphs) + data_index = np.arange(dataset_size) + num_inputs = len(inputs) + all_flags = [flag_batch_id, flag_count, flag_subgraph_id] + is_attributes = [True if all([x[i] is not None for x in all_flags]) else False for i in range(num_inputs)] + where_batch = [] + where_subgraph= [] + where_count = [] + num_attributes = sum(is_attributes) + + if shuffle: + np.random.shuffle(data_index) + + for batch_index in range(0, dataset_size, batch_size): + idx = data_index[batch_index:batch_index + batch_size] + graphs_batch = [graphs[i] for i in idx] + + out = [None for _ in range(num_attributes)] + out_counts = [None for _ in range(num_attributes)] + + for i in range(num_inputs): + if not is_attributes[i]: + continue + + array_list = [x[inputs[i]["name"]] for x in graphs_batch] + if assignment_to_id[i] is None: + out[i] = np.array(array_list, dtype=inputs[i]["dtype"]) + else: + out[i] = np.concatenate(array_list, axis=0) + counts = np.array([len(x) for x in array_list], dtype="int64") + out_counts[i] = counts + ids = assignment_to_id[i] + if out[where_count[ids]] is not None: + out[where_count[ids]] = counts + if out[where_batch[ids]] is not None: + out[where_batch[ids]] = np.repeat(np.arange(len(array_list), dtype="int64"), repeats=counts) + if out[where_subgraph[ids]] is not None: + out[where_subgraph[ids]] = np.concatenate([np.arange(x, dtype="int64") for x in counts], axis=0) + + # Indices + for i in range(num_inputs): + if assignment_of_indices[i] is not None: + edge_indices_flatten = out[i] + count_nodes = out_counts[assignment_of_indices[i]] + count_edges = out_counts[i] + node_splits = np.pad(np.cumsum(count_nodes), [[1, 0]]) + offset_edge_indices = np.expand_dims(np.repeat(node_splits[:-1], count_edges), axis=-1) + disjoint_indices = edge_indices_flatten + offset_edge_indices + disjoint_indices = np.transpose(disjoint_indices) + out[i] = disjoint_indices + + if isinstance(outputs, list): + out_y = [] + for k in range(len(outputs)): + array_list = [x[outputs[k]["name"]] for x in graphs_batch] + out_y.append(np.array(array_list, dtype=outputs[k]["dtype"])) + else: + out_y = np.array( + [x[outputs["name"]] for x in graphs_batch], dtype=outputs["dtype"]) + + yield tuple(out), out_y + + input_spec = tuple([tf.TensorSpec(shape=tuple([None] + list(x["shape"])), dtype=x["dtype"]) for x in inputs]) + + if isinstance(outputs, list): + output_spec = tuple([tf.TensorSpec(shape=tuple([None] + list(x["shape"])), dtype=x["dtype"]) for x in outputs]) + else: + output_spec = tf.TensorSpec(shape=tuple([None] + list(outputs["shape"])), dtype=outputs["dtype"]) + + data_loader = tf.data.Dataset.from_generator( + generator, + output_signature=( + input_spec, + output_spec + ) + ) + + return data_loader diff --git a/kgcnn/literature/NMPN/__init__.py b/kgcnn/literature/NMPN/__init__.py index 547d2889..69e719cb 100644 --- a/kgcnn/literature/NMPN/__init__.py +++ b/kgcnn/literature/NMPN/__init__.py @@ -1,10 +1,10 @@ from ._make import make_model, model_default -# from ._make import make_crystal_model, model_crystal_default +from ._make import make_crystal_model, model_crystal_default __all__ = [ "make_model", "model_default", - # "make_crystal_model", - # "model_crystal_default" + "make_crystal_model", + "model_crystal_default" ] diff --git a/training/hyper/hyper_mp_jdft2d.py b/training/hyper/hyper_mp_jdft2d.py index d387f7e2..f6568afd 100644 --- a/training/hyper/hyper_mp_jdft2d.py +++ b/training/hyper/hyper_mp_jdft2d.py @@ -232,7 +232,7 @@ "input_tensor_type": "ragged", 'input_embedding': None, "input_node_embedding": {"input_dim": 95, "output_dim": 64}, - "input_edge_embedding": {"input_dim": 100, "output_dim": 64}, + # "input_edge_embedding": {"input_dim": 100, "output_dim": 64}, "make_distance": True, "expand_distance": True, 'gauss_args': {"bins": 25, "distance": 5, "offset": 0.0, "sigma": 0.4}, 'meg_block_args': {'node_embed': [64, 32, 32], 'edge_embed': [64, 32, 32], diff --git a/training/hyper/hyper_qm9_energies.py b/training/hyper/hyper_qm9_energies.py index ca6d9f85..28a1b1fb 100644 --- a/training/hyper/hyper_qm9_energies.py +++ b/training/hyper/hyper_qm9_energies.py @@ -94,7 +94,7 @@ "input_tensor_type": "ragged", "input_embedding": None, "input_node_embedding": {"input_dim": 10, "output_dim": 16}, - "input_edge_embedding": {"input_dim": 100, "output_dim": 64}, + # "input_edge_embedding": {"input_dim": 100, "output_dim": 64}, "gauss_args": {"bins": 20, "distance": 4, "offset": 0.0, "sigma": 0.4}, "meg_block_args": {"node_embed": [64, 32, 32], "edge_embed": [64, 32, 32], "env_embed": [64, 32, 32], "activation": "kgcnn>softplus2"}, diff --git a/training/results/MatProjectJdft2dDataset/CGCNN_make_crystal_model/CGCNN_MatProjectJdft2dDataset_score.yaml b/training/results/MatProjectJdft2dDataset/CGCNN_make_crystal_model/CGCNN_MatProjectJdft2dDataset_score.yaml new file mode 100644 index 00000000..c0d592ed --- /dev/null +++ b/training/results/MatProjectJdft2dDataset/CGCNN_make_crystal_model/CGCNN_MatProjectJdft2dDataset_score.yaml @@ -0,0 +1,156 @@ +OS: posix_linux +backend: tensorflow +cuda_available: 'True' +data_unit: meV/atom +date_time: '2023-12-15 14:11:18' +device_id: '[LogicalDevice(name=''/device:CPU:0'', device_type=''CPU''), LogicalDevice(name=''/device:GPU:0'', + device_type=''GPU'')]' +device_memory: '[]' +device_name: '[{}, {''compute_capability'': (8, 0), ''device_name'': ''NVIDIA A100 + 80GB PCIe''}]' +epochs: +- 1000 +- 1000 +- 1000 +- 1000 +- 1000 +execute_folds: null +kgcnn_version: 4.0.0 +learning_rate: +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +loss: +- 0.0164102204144001 +- 0.049058035016059875 +- 0.019465263932943344 +- 0.04565507546067238 +- 0.018842527642846107 +max_learning_rate: +- 0.0010000000474974513 +- 0.0010000000474974513 +- 0.0010000000474974513 +- 0.0010000000474974513 +- 0.0010000000474974513 +max_loss: +- 0.4873706102371216 +- 0.4823596477508545 +- 0.4699196219444275 +- 0.44965362548828125 +- 0.46280232071876526 +max_scaled_mean_absolute_error: +- 70.67137908935547 +- 67.2155532836914 +- 64.04151916503906 +- 57.88063049316406 +- 55.339359283447266 +max_scaled_root_mean_squared_error: +- 152.97747802734375 +- 145.45938110351562 +- 142.39430236816406 +- 133.3281707763672 +- 124.1724624633789 +max_val_loss: +- 0.26619401574134827 +- 0.3685653507709503 +- 0.40042510628700256 +- 0.6030166745185852 +- 0.6730947494506836 +max_val_scaled_mean_absolute_error: +- 38.59397888183594 +- 51.37816619873047 +- 54.60137939453125 +- 77.64346313476562 +- 80.49958038330078 +max_val_scaled_root_mean_squared_error: +- 83.75748443603516 +- 116.78331756591797 +- 139.4757537841797 +- 208.695556640625 +- 203.95716857910156 +min_learning_rate: +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +- 1.1979999726463575e-05 +min_loss: +- 0.0164102204144001 +- 0.049058035016059875 +- 0.019465263932943344 +- 0.04565083235502243 +- 0.018809372559189796 +min_scaled_mean_absolute_error: +- 2.3664445877075195 +- 6.843836784362793 +- 2.6597533226013184 +- 5.826504707336426 +- 2.251401424407959 +min_scaled_root_mean_squared_error: +- 12.384324073791504 +- 72.17166900634766 +- 12.613103866577148 +- 70.442138671875 +- 11.607565879821777 +min_val_loss: +- 0.1912095546722412 +- 0.27041757106781006 +- 0.34239882230758667 +- 0.4209592342376709 +- 0.5613946318626404 +min_val_scaled_mean_absolute_error: +- 27.72239875793457 +- 37.69633102416992 +- 46.68900680541992 +- 54.20203399658203 +- 67.14065551757812 +min_val_scaled_root_mean_squared_error: +- 50.73049545288086 +- 101.22430419921875 +- 123.49358367919922 +- 149.4101104736328 +- 180.5041961669922 +model_class: make_crystal_model +model_name: CGCNN +model_version: '2023-11-28' +multi_target_indices: null +number_histories: 5 +scaled_mean_absolute_error: +- 2.3664445877075195 +- 6.843836784362793 +- 2.6597533226013184 +- 5.827065944671631 +- 2.255373477935791 +scaled_root_mean_squared_error: +- 12.439318656921387 +- 72.18438720703125 +- 12.624910354614258 +- 70.45165252685547 +- 11.619587898254395 +seed: 42 +time_list: +- '0:01:59.293506' +- '0:01:55.045981' +- '0:01:54.909115' +- '0:01:54.808906' +- '0:01:57.223260' +val_loss: +- 0.25275886058807373 +- 0.29345396161079407 +- 0.3919019401073456 +- 0.5991652011871338 +- 0.6718166470527649 +val_scaled_mean_absolute_error: +- 36.646087646484375 +- 40.90761184692383 +- 53.439170837402344 +- 77.14754486083984 +- 80.34671783447266 +val_scaled_root_mean_squared_error: +- 79.33216857910156 +- 105.15802001953125 +- 138.79078674316406 +- 180.42710876464844 +- 199.3755645751953 diff --git a/training/results/MatProjectJdft2dDataset/CGCNN_make_crystal_model/CGCNN_hyper.json b/training/results/MatProjectJdft2dDataset/CGCNN_make_crystal_model/CGCNN_hyper.json new file mode 100644 index 00000000..4b179808 --- /dev/null +++ b/training/results/MatProjectJdft2dDataset/CGCNN_make_crystal_model/CGCNN_hyper.json @@ -0,0 +1 @@ +{"model": {"class_name": "make_crystal_model", "module_name": "kgcnn.literature.CGCNN", "config": {"name": "CGCNN", "inputs": [{"shape": [null], "name": "node_number", "dtype": "int64", "ragged": true}, {"shape": [null, 3], "name": "node_frac_coordinates", "dtype": "float64", "ragged": true}, {"shape": [null, 2], "name": "range_indices", "dtype": "int64", "ragged": true}, {"shape": [null, 3], "name": "range_image", "dtype": "float32", "ragged": true}, {"shape": [3, 3], "name": "graph_lattice", "dtype": "float64", "ragged": false}], "input_tensor_type": "ragged", "input_node_embedding": {"input_dim": 95, "output_dim": 64}, "representation": "unit", "expand_distance": true, "make_distances": true, "gauss_args": {"bins": 60, "distance": 6, "offset": 0.0, "sigma": 0.4}, "conv_layer_args": {"units": 128, "activation_s": "kgcnn>shifted_softplus", "activation_out": "kgcnn>shifted_softplus", "batch_normalization": true}, "node_pooling_args": {"pooling_method": "scatter_mean"}, "depth": 4, "output_mlp": {"use_bias": [true, true, false], "units": [128, 64, 1], "activation": ["kgcnn>shifted_softplus", "kgcnn>shifted_softplus", "linear"]}}}, "training": {"cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": true}}, "fit": {"batch_size": 128, "epochs": 1000, "validation_freq": 10, "verbose": 2, "callbacks": [{"class_name": "kgcnn>LinearLearningRateScheduler", "config": {"learning_rate_start": 0.001, "learning_rate_stop": 1e-05, "epo_min": 500, "epo": 1000, "verbose": 0}}]}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": 0.001}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}}, "data": {"data_unit": "meV/atom"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "4.0.0"}, "dataset": {"class_name": "MatProjectJdft2dDataset", "module_name": "kgcnn.data.datasets.MatProjectJdft2dDataset", "config": {}, "methods": [{"map_list": {"method": "set_range_periodic", "max_distance": 6.0}}]}} \ No newline at end of file diff --git a/training/results/MatProjectJdft2dDataset/DimeNetPP_make_crystal_model/DimeNetPP_MatProjectJdft2dDataset_score.yaml b/training/results/MatProjectJdft2dDataset/DimeNetPP_make_crystal_model/DimeNetPP_MatProjectJdft2dDataset_score.yaml new file mode 100644 index 00000000..c7c36344 --- /dev/null +++ b/training/results/MatProjectJdft2dDataset/DimeNetPP_make_crystal_model/DimeNetPP_MatProjectJdft2dDataset_score.yaml @@ -0,0 +1,138 @@ +OS: posix_linux +backend: tensorflow +cuda_available: 'True' +data_unit: meV/atom +date_time: '2023-12-15 16:03:44' +device_id: '[LogicalDevice(name=''/device:CPU:0'', device_type=''CPU''), LogicalDevice(name=''/device:GPU:0'', + device_type=''GPU'')]' +device_memory: '[]' +device_name: '[{}, {''compute_capability'': (8, 0), ''device_name'': ''NVIDIA A100 + 80GB PCIe''}]' +epochs: +- 780 +- 780 +- 780 +- 780 +- 780 +execute_folds: null +kgcnn_version: 4.0.0 +loss: +- 0.02414010465145111 +- 0.033606626093387604 +- 0.019114937633275986 +- 0.017684955149888992 +- 0.018963322043418884 +max_loss: +- 0.5006669163703918 +- 0.5069660544395447 +- 0.491592675447464 +- 0.474305123090744 +- 0.48968806862831116 +max_scaled_mean_absolute_error: +- 72.76997375488281 +- 70.56968688964844 +- 66.72997283935547 +- 61.24168014526367 +- 58.24784469604492 +max_scaled_root_mean_squared_error: +- 274.29949951171875 +- 570.3961791992188 +- 139.32717895507812 +- 129.78968811035156 +- 121.00675201416016 +max_val_loss: +- 0.3028877079486847 +- 0.38531768321990967 +- 0.4087398648262024 +- 0.5757158994674683 +- 0.5982771515846252 +max_val_scaled_mean_absolute_error: +- 43.91399002075195 +- 53.884586334228516 +- 55.94514465332031 +- 74.37774658203125 +- 71.91637420654297 +max_val_scaled_root_mean_squared_error: +- 94.42359924316406 +- 128.80096435546875 +- 132.58773803710938 +- 168.0118408203125 +- 187.9774169921875 +min_loss: +- 0.017726201564073563 +- 0.01848507672548294 +- 0.01753951981663704 +- 0.01579798199236393 +- 0.01495886966586113 +min_scaled_mean_absolute_error: +- 2.5789570808410645 +- 2.5784459114074707 +- 2.3713207244873047 +- 2.0377860069274902 +- 1.792113184928894 +min_scaled_root_mean_squared_error: +- 7.1383771896362305 +- 7.110655784606934 +- 6.53678035736084 +- 5.663389682769775 +- 4.469799518585205 +min_val_loss: +- 0.20106370747089386 +- 0.2661769986152649 +- 0.3463006615638733 +- 0.42671656608581543 +- 0.5159809589385986 +min_val_scaled_mean_absolute_error: +- 29.151100158691406 +- 37.19593811035156 +- 47.44532775878906 +- 55.17722702026367 +- 61.99851989746094 +min_val_scaled_root_mean_squared_error: +- 58.186100006103516 +- 99.66935729980469 +- 120.39973449707031 +- 142.9620819091797 +- 172.0601806640625 +model_class: make_crystal_model +model_name: DimeNetPP +model_version: '2023-12-04' +multi_target_indices: null +number_histories: 5 +scaled_mean_absolute_error: +- 3.5114622116088867 +- 4.672144889831543 +- 2.6156845092773438 +- 2.2767393589019775 +- 2.2692582607269287 +scaled_root_mean_squared_error: +- 9.191644668579102 +- 15.601258277893066 +- 7.893765449523926 +- 6.115713596343994 +- 5.55543851852417 +seed: 42 +time_list: +- '0:22:09.360474' +- '0:22:20.708775' +- '0:22:35.412688' +- '0:22:14.474465' +- '0:22:02.065494' +val_loss: +- 0.22789409756660461 +- 0.31266239285469055 +- 0.3644629418849945 +- 0.45651325583457947 +- 0.5467742085456848 +val_scaled_mean_absolute_error: +- 33.04108810424805 +- 43.78741455078125 +- 49.974403381347656 +- 58.943748474121094 +- 65.69357299804688 +val_scaled_root_mean_squared_error: +- 64.45917510986328 +- 110.56239318847656 +- 127.53701782226562 +- 148.5798797607422 +- 179.1616668701172 diff --git a/training/results/MatProjectJdft2dDataset/DimeNetPP_make_crystal_model/DimeNetPP_hyper.json b/training/results/MatProjectJdft2dDataset/DimeNetPP_make_crystal_model/DimeNetPP_hyper.json new file mode 100644 index 00000000..0d9f319d --- /dev/null +++ b/training/results/MatProjectJdft2dDataset/DimeNetPP_make_crystal_model/DimeNetPP_hyper.json @@ -0,0 +1 @@ +{"model": {"class_name": "make_crystal_model", "module_name": "kgcnn.literature.DimeNetPP", "config": {"name": "DimeNetPP", "inputs": [{"shape": [null], "name": "node_number", "dtype": "int64", "ragged": true}, {"shape": [null, 3], "name": "node_coordinates", "dtype": "float32", "ragged": true}, {"shape": [null, 2], "name": "range_indices", "dtype": "int64", "ragged": true}, {"shape": [null, 2], "name": "angle_indices", "dtype": "int64", "ragged": true}, {"shape": [null, 3], "name": "range_image", "dtype": "int64", "ragged": true}, {"shape": [3, 3], "name": "graph_lattice", "dtype": "float32", "ragged": false}], "input_tensor_type": "ragged", "input_embedding": null, "input_node_embedding": {"input_dim": 95, "output_dim": 128, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -1.7320508075688772, "maxval": 1.7320508075688772}}}, "emb_size": 128, "out_emb_size": 256, "int_emb_size": 64, "basis_emb_size": 8, "num_blocks": 4, "num_spherical": 7, "num_radial": 6, "cutoff": 5.0, "envelope_exponent": 5, "num_before_skip": 1, "num_after_skip": 2, "num_dense_output": 3, "num_targets": 1, "extensive": false, "output_init": "zeros", "activation": "swish", "verbose": 10, "output_embedding": "graph", "use_output_mlp": false, "output_mlp": {}}}, "training": {"cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": true}}, "fit": {"batch_size": 16, "epochs": 780, "validation_freq": 10, "verbose": 2, "callbacks": [], "validation_batch_size": 8}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": {"class_name": "kgcnn>LinearWarmupExponentialDecay", "config": {"learning_rate": 0.001, "warmup_steps": 3000.0, "decay_steps": 4000000.0, "decay_rate": 0.01}}, "use_ema": true, "amsgrad": true}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}, "multi_target_indices": null}, "data": {}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "4.0.0"}, "dataset": {"class_name": "MatProjectJdft2dDataset", "module_name": "kgcnn.data.datasets.MatProjectJdft2dDataset", "config": {}, "methods": [{"map_list": {"method": "set_range_periodic", "max_distance": 5.0, "max_neighbours": 17}}, {"map_list": {"method": "set_angle", "allow_multi_edges": true, "allow_reverse_edges": true}}]}} \ No newline at end of file