From 719b01e872cd554cdc5ed9a9eaa00783904cc8d3 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Wed, 23 Apr 2025 15:00:16 -0400 Subject: [PATCH 01/19] add test for amd --- tests/test_amd.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/test_amd.py diff --git a/tests/test_amd.py b/tests/test_amd.py new file mode 100644 index 0000000..ca0275a --- /dev/null +++ b/tests/test_amd.py @@ -0,0 +1,45 @@ +import pytest +from pathlib import Path +from diffpy.similarity.metrics.amd import amd_compare, pdd_compare + + +# get cif files for tests +curr_path = Path().absolute() +cif1 = curr_path / 'test_data' / 'mp-390.cif' +cif2 = curr_path / 'test_data' / 'mp-458.cif' + +# set up input and output for tests +# for each case: (input1, input2, expected_output) +amd_test_datasets = [ + # test identical cif files + (cif1, cif1, 0.0), + # test different cif files + (cif1, cif2, dm(cif1, cif2)), + # test generate a distance matrix from two lists + ([cif1, cif2], [cif1, cif2], dm) +] + +pdd_test_datasets = [ + # test identical cif files + (cif1, cif1, 0.0), + # test different cif files + (cif1, cif2, dm(cif1, cif2)), + # test generate a distance matrix from two lists + ([cif1, cif2], [cif1, cif2], dm) +] + +@pytest.mark.parametrize("input1, input2, expected_output", amd_test_datasets) +def test_amd_compare(input1, input2, expected_output): + """ + Test the amd_compare function. + """ + result = amd_compare(input1, input2) + assert result == expected_output + +@pytest.mark.parametrize("input1, input2, expected_output", pdd_test_datasets) +def test_pdd_compare(input1, input2, expected_output): + """ + Test the pdd_compare function. + """ + result = pdd_compare(input1, input2) + assert result == expected_output From 3c649589d41ae62da7311af827780d0862672c26 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Wed, 23 Apr 2025 15:00:33 -0400 Subject: [PATCH 02/19] add cif files for tests --- tests/test_data/mp-390.cif | 32 ++++++++++++++++++++++++++++++++ tests/test_data/mp-458.cif | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/test_data/mp-390.cif create mode 100644 tests/test_data/mp-458.cif diff --git a/tests/test_data/mp-390.cif b/tests/test_data/mp-390.cif new file mode 100644 index 0000000..974aa00 --- /dev/null +++ b/tests/test_data/mp-390.cif @@ -0,0 +1,32 @@ +# generated using pymatgen +data_TiO2 +_symmetry_space_group_name_H-M 'P 1' +_cell_length_a 3.78253951 +_cell_length_b 3.78253984 +_cell_length_c 5.50145140 +_cell_angle_alpha 110.10714408 +_cell_angle_beta 110.10713258 +_cell_angle_gamma 89.99997885 +_symmetry_Int_Tables_number 1 +_chemical_formula_structural TiO2 +_chemical_formula_sum 'Ti2 O4' +_cell_volume 68.78397225 +_cell_formula_units_Z 2 +loop_ + _symmetry_equiv_pos_site_id + _symmetry_equiv_pos_as_xyz + 1 'x, y, z' +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + Ti Ti0 1 0.87500000 0.62500000 0.25000000 1 + Ti Ti1 1 0.12500000 0.37500000 0.75000000 1 + O O2 1 0.33215263 0.58215263 0.16430425 1 + O O3 1 0.08215263 0.83215263 0.66430425 1 + O O4 1 0.91784737 0.16784737 0.33569575 1 + O O5 1 0.66784737 0.41784737 0.83569575 1 diff --git a/tests/test_data/mp-458.cif b/tests/test_data/mp-458.cif new file mode 100644 index 0000000..6b6730b --- /dev/null +++ b/tests/test_data/mp-458.cif @@ -0,0 +1,36 @@ +# generated using pymatgen +data_Ti2O3 +_symmetry_space_group_name_H-M 'P 1' +_cell_length_a 5.45736044 +_cell_length_b 5.45736193 +_cell_length_c 5.45736145 +_cell_angle_alpha 55.79413805 +_cell_angle_beta 55.79412997 +_cell_angle_gamma 55.79413435 +_symmetry_Int_Tables_number 1 +_chemical_formula_structural Ti2O3 +_chemical_formula_sum 'Ti4 O6' +_cell_volume 103.72111553 +_cell_formula_units_Z 2 +loop_ + _symmetry_equiv_pos_site_id + _symmetry_equiv_pos_as_xyz + 1 'x, y, z' +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + Ti Ti0 1 0.34453747 0.34453747 0.34453747 1 + Ti Ti1 1 0.15546253 0.15546253 0.15546253 1 + Ti Ti2 1 0.65546253 0.65546253 0.65546253 1 + Ti Ti3 1 0.84453747 0.84453747 0.84453747 1 + O O4 1 0.56439446 0.93560554 0.25000000 1 + O O5 1 0.25000000 0.56439446 0.93560554 1 + O O6 1 0.93560554 0.25000000 0.56439446 1 + O O7 1 0.43560554 0.06439446 0.75000000 1 + O O8 1 0.75000000 0.43560554 0.06439446 1 + O O9 1 0.06439446 0.75000000 0.43560554 1 From 771a24d788a993016adbf412f172ba7c87868931 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Wed, 23 Apr 2025 15:00:57 -0400 Subject: [PATCH 03/19] edit docstring --- src/diffpy/similarity/metrics/amd.py | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 src/diffpy/similarity/metrics/amd.py diff --git a/src/diffpy/similarity/metrics/amd.py b/src/diffpy/similarity/metrics/amd.py new file mode 100644 index 0000000..20d2402 --- /dev/null +++ b/src/diffpy/similarity/metrics/amd.py @@ -0,0 +1,53 @@ +import amd + + +def amd_compare(cif1, cif2, k=100): + """ + Compare two CIF files or two lists fo CIF files using the AMD metric. + Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] + to compare all CIF files in cif_list. + + Parameters + ---------- + cif1 : str + Path to the first CIF file (or list of CIF files) + cif2 : str + Path to the second CIF file (or list of CIF files) + k : int + Number of nearest neighbors to consider. + Default is 100. + + Returns + ------- + dm : numpy.ndarray + Distance matrix of shape (len(cif1), len(cif2)). + Each element represents the AMD distance between two structures from CIF files. + """ + dm = amd.compare(cif1, cif2, by='AMD', k=k) + return dm + + +def pdd_compare(cif1, cif2, k=100): + """ + Compare two CIF files or two lists fo CIF files using the PDD metric. + Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] + to compare all CIF files in cif_list. + + Parameters + ---------- + cif1 : str + Path to the first CIF file (or list of CIF files) + cif2 : str + Path to the second CIF file (or list of CIF files) + k : int + Number of nearest neighbors to consider. + Default is 100. + + Returns + ------- + dm : numpy.ndarray + Distance matrix of shape (len(cif1), len(cif2)). + Each element represents the PDD distance between two structures from CIF files. + """ + dm = amd.compare(cif1, cif2, by='PDD', k=k) + return dm \ No newline at end of file From 7596957a1d6278b05e7c9e7d54e1c5225eb0cb84 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Wed, 23 Apr 2025 15:53:42 -0400 Subject: [PATCH 04/19] edit amd and pdd functions --- src/diffpy/similarity/metrics/amd.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/diffpy/similarity/metrics/amd.py b/src/diffpy/similarity/metrics/amd.py index 20d2402..b68e9d4 100644 --- a/src/diffpy/similarity/metrics/amd.py +++ b/src/diffpy/similarity/metrics/amd.py @@ -19,12 +19,17 @@ def amd_compare(cif1, cif2, k=100): Returns ------- - dm : numpy.ndarray - Distance matrix of shape (len(cif1), len(cif2)). + dm : float or pandas.DataFrame + If cif1 and cif2 are both strings, return the AMD distance value (float). + If either cif1 or cif2 is a list, return a distance matrix of shape (len(cif1), len(cif2)). Each element represents the AMD distance between two structures from CIF files. """ dm = amd.compare(cif1, cif2, by='AMD', k=k) - return dm + if isinstance(cif1, list) or isinstance(cif2, list): + # if at least one input is a list, return the distance matrix + return dm + else: + return float(dm.iloc[0, 0]) # return the single distance value as a float def pdd_compare(cif1, cif2, k=100): @@ -50,4 +55,8 @@ def pdd_compare(cif1, cif2, k=100): Each element represents the PDD distance between two structures from CIF files. """ dm = amd.compare(cif1, cif2, by='PDD', k=k) - return dm \ No newline at end of file + if isinstance(cif1, list) or isinstance(cif2, list): + # if at least one input is a list, return the distance matrix + return dm + else: + return float(dm.iloc[0, 0]) # return the single distance value as a float \ No newline at end of file From d6ac6eaa9963edb4dec17d6378cf58a9f3e073c4 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Wed, 23 Apr 2025 15:53:59 -0400 Subject: [PATCH 05/19] edit tests --- tests/test_amd.py | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/tests/test_amd.py b/tests/test_amd.py index ca0275a..7b645a0 100644 --- a/tests/test_amd.py +++ b/tests/test_amd.py @@ -1,12 +1,15 @@ import pytest from pathlib import Path +import pandas as pd from diffpy.similarity.metrics.amd import amd_compare, pdd_compare # get cif files for tests curr_path = Path().absolute() -cif1 = curr_path / 'test_data' / 'mp-390.cif' -cif2 = curr_path / 'test_data' / 'mp-458.cif' +cif1 = curr_path / 'tests'/ 'test_data' / 'mp-390.cif' +cif2 = curr_path / 'tests'/ 'test_data' / 'mp-458.cif' +dm_amd = pd.read_pickle(curr_path / 'tests'/ 'test_data' / 'dm_amd.pkl') +dm_pdd = pd.read_pickle(curr_path / 'tests'/ 'test_data' / 'dm_pdd.pkl') # set up input and output for tests # for each case: (input1, input2, expected_output) @@ -14,32 +17,43 @@ # test identical cif files (cif1, cif1, 0.0), # test different cif files - (cif1, cif2, dm(cif1, cif2)), + (cif1, cif2, 0.5725326132426836), # test generate a distance matrix from two lists - ([cif1, cif2], [cif1, cif2], dm) + ([cif1, cif2], [cif1, cif2], dm_amd) ] +@pytest.mark.parametrize("input1, input2, expected_output", amd_test_datasets) +def test_amd_compare(input1, input2, expected_output): + """ + Test the amd_compare function. + """ + result = amd_compare(input1, input2, k=100) + if isinstance(input1, list) or isinstance(input2, list): + # check if two dataframes are equal + assert result.equals(expected_output) + else: + # check if two floats are equal + assert result == expected_output + +# for each case: (input1, input2, expected_output) pdd_test_datasets = [ # test identical cif files (cif1, cif1, 0.0), # test different cif files - (cif1, cif2, dm(cif1, cif2)), + (cif1, cif2, 0.6675364987310654), # test generate a distance matrix from two lists - ([cif1, cif2], [cif1, cif2], dm) + ([cif1, cif2], [cif1, cif2], dm_pdd) ] -@pytest.mark.parametrize("input1, input2, expected_output", amd_test_datasets) -def test_amd_compare(input1, input2, expected_output): - """ - Test the amd_compare function. - """ - result = amd_compare(input1, input2) - assert result == expected_output - @pytest.mark.parametrize("input1, input2, expected_output", pdd_test_datasets) def test_pdd_compare(input1, input2, expected_output): """ Test the pdd_compare function. """ - result = pdd_compare(input1, input2) - assert result == expected_output + result = pdd_compare(input1, input2, k=100) + if isinstance(input1, list) or isinstance(input2, list): + # check if two dataframes are equal + assert result.equals(expected_output) + else: + # check if two floats are equal + assert result == expected_output \ No newline at end of file From 001b6324439c49e9538b18e2fb306383e33851b6 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Wed, 23 Apr 2025 15:55:32 -0400 Subject: [PATCH 06/19] add test data --- tests/test_data/dm_amd.pkl | Bin 0 -> 710 bytes tests/test_data/dm_pdd.pkl | Bin 0 -> 710 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_data/dm_amd.pkl create mode 100644 tests/test_data/dm_pdd.pkl diff --git a/tests/test_data/dm_amd.pkl b/tests/test_data/dm_amd.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a0e8a292af300c41dfe6f0ed881b2e126b93f296 GIT binary patch literal 710 zcmZ`%&59H;5T0&l{0IL)J&6}_7NM2#G6!KmWkDNfVA<;ulJ4{*Y&zZKhjqXOJ{NAVrB(&H?%f;m(wNqtrIeW{OwFW=K%x5BNK>XO>5XbjIPLu(G<%4_QA zcIThM)+acGVd#^3ZXi6^442w*qp7l4O*Nf!V?=-K#Khn`+SlzOBV9M>b0Z4~*GOfWn)#~Y2BU^wAFi4|v%kKLe?Hy$F;_iJBBsfE7?i{;c^d5!_E4nz=hZ3sdu}sEEoyp|XnsK6XuLzXcoYU}zf-qF#ZjwG z`nAWR)EK%zRf*w6qpdT(a1nnqT*7bb9%D00c$F|%kxKG(3F3B0XXl81_=o5OiJru! z`@3>BLTbBmG($eox|}tgm95miq1Rt9#yJ2u%z@Y%1#|8iZZS&99z$_Kc3bXga7~l literal 0 HcmV?d00001 diff --git a/tests/test_data/dm_pdd.pkl b/tests/test_data/dm_pdd.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e7c5ae3dd2f705ed56670ef65033ff654e0ed630 GIT binary patch literal 710 zcmZ`%ON$gS5T0&le1k90J&6}_7NL>xvIk*6WkH*r0okKVNOz|vVbkd*57q$}*2Ap3 zklU928GnmM&z?MMCF3l!f;m(wNqtrIeW|0tmv3n=E`O(^s-h+;X+xa6_qAysG>8b4fvZcWn1p=EYENT{{CMCk)kl!IRyMI z;#4QfeaT1>Fd_J!a!ohSNbClErbP+<2B~%16t1qB#;D=fN6Y5V?ayD|kKFqo;D$%TLm9g5DHsEjjEBI~kF*LJ)S4ja&Qc0dJfZz4$^a9b3{}3G`(bLd$ ze^*RLNNrb)rq6-lwePDi2NQRU%=i<&i7oP{Jmb$oB|ciOv-O`Ge7uCSkOQX&<;TGa wG*?4L8fIzay@%DQwKzV9;4hO}(phvLP$}C3Ln_yy?9?(MX~Cp{6XS~P5B0SfJ^%m! literal 0 HcmV?d00001 From 787ff97e7c775943049d060b1eda167cbf830132 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 24 Apr 2025 17:32:35 +0000 Subject: [PATCH 07/19] [pre-commit.ci] auto fixes from pre-commit hooks --- src/diffpy/similarity/metrics/amd.py | 30 +++++++++++++++------------- tests/test_amd.py | 30 ++++++++++++++-------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/src/diffpy/similarity/metrics/amd.py b/src/diffpy/similarity/metrics/amd.py index b68e9d4..890ae4f 100644 --- a/src/diffpy/similarity/metrics/amd.py +++ b/src/diffpy/similarity/metrics/amd.py @@ -2,10 +2,9 @@ def amd_compare(cif1, cif2, k=100): - """ - Compare two CIF files or two lists fo CIF files using the AMD metric. - Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] - to compare all CIF files in cif_list. + """Compare two CIF files or two lists fo CIF files using the AMD metric. + Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files + in cif_list. Parameters ---------- @@ -16,7 +15,7 @@ def amd_compare(cif1, cif2, k=100): k : int Number of nearest neighbors to consider. Default is 100. - + Returns ------- dm : float or pandas.DataFrame @@ -24,19 +23,20 @@ def amd_compare(cif1, cif2, k=100): If either cif1 or cif2 is a list, return a distance matrix of shape (len(cif1), len(cif2)). Each element represents the AMD distance between two structures from CIF files. """ - dm = amd.compare(cif1, cif2, by='AMD', k=k) + dm = amd.compare(cif1, cif2, by="AMD", k=k) if isinstance(cif1, list) or isinstance(cif2, list): # if at least one input is a list, return the distance matrix return dm else: - return float(dm.iloc[0, 0]) # return the single distance value as a float + return float( + dm.iloc[0, 0] + ) # return the single distance value as a float def pdd_compare(cif1, cif2, k=100): - """ - Compare two CIF files or two lists fo CIF files using the PDD metric. - Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] - to compare all CIF files in cif_list. + """Compare two CIF files or two lists fo CIF files using the PDD metric. + Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files + in cif_list. Parameters ---------- @@ -47,16 +47,18 @@ def pdd_compare(cif1, cif2, k=100): k : int Number of nearest neighbors to consider. Default is 100. - + Returns ------- dm : numpy.ndarray Distance matrix of shape (len(cif1), len(cif2)). Each element represents the PDD distance between two structures from CIF files. """ - dm = amd.compare(cif1, cif2, by='PDD', k=k) + dm = amd.compare(cif1, cif2, by="PDD", k=k) if isinstance(cif1, list) or isinstance(cif2, list): # if at least one input is a list, return the distance matrix return dm else: - return float(dm.iloc[0, 0]) # return the single distance value as a float \ No newline at end of file + return float( + dm.iloc[0, 0] + ) # return the single distance value as a float diff --git a/tests/test_amd.py b/tests/test_amd.py index 7b645a0..908f19d 100644 --- a/tests/test_amd.py +++ b/tests/test_amd.py @@ -1,15 +1,16 @@ -import pytest from pathlib import Path + import pandas as pd -from diffpy.similarity.metrics.amd import amd_compare, pdd_compare +import pytest +from diffpy.similarity.metrics.amd import amd_compare, pdd_compare # get cif files for tests curr_path = Path().absolute() -cif1 = curr_path / 'tests'/ 'test_data' / 'mp-390.cif' -cif2 = curr_path / 'tests'/ 'test_data' / 'mp-458.cif' -dm_amd = pd.read_pickle(curr_path / 'tests'/ 'test_data' / 'dm_amd.pkl') -dm_pdd = pd.read_pickle(curr_path / 'tests'/ 'test_data' / 'dm_pdd.pkl') +cif1 = curr_path / "tests" / "test_data" / "mp-390.cif" +cif2 = curr_path / "tests" / "test_data" / "mp-458.cif" +dm_amd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_amd.pkl") +dm_pdd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_pdd.pkl") # set up input and output for tests # for each case: (input1, input2, expected_output) @@ -19,14 +20,13 @@ # test different cif files (cif1, cif2, 0.5725326132426836), # test generate a distance matrix from two lists - ([cif1, cif2], [cif1, cif2], dm_amd) + ([cif1, cif2], [cif1, cif2], dm_amd), ] + @pytest.mark.parametrize("input1, input2, expected_output", amd_test_datasets) def test_amd_compare(input1, input2, expected_output): - """ - Test the amd_compare function. - """ + """Test the amd_compare function.""" result = amd_compare(input1, input2, k=100) if isinstance(input1, list) or isinstance(input2, list): # check if two dataframes are equal @@ -35,6 +35,7 @@ def test_amd_compare(input1, input2, expected_output): # check if two floats are equal assert result == expected_output + # for each case: (input1, input2, expected_output) pdd_test_datasets = [ # test identical cif files @@ -42,18 +43,17 @@ def test_amd_compare(input1, input2, expected_output): # test different cif files (cif1, cif2, 0.6675364987310654), # test generate a distance matrix from two lists - ([cif1, cif2], [cif1, cif2], dm_pdd) + ([cif1, cif2], [cif1, cif2], dm_pdd), ] + @pytest.mark.parametrize("input1, input2, expected_output", pdd_test_datasets) def test_pdd_compare(input1, input2, expected_output): - """ - Test the pdd_compare function. - """ + """Test the pdd_compare function.""" result = pdd_compare(input1, input2, k=100) if isinstance(input1, list) or isinstance(input2, list): # check if two dataframes are equal assert result.equals(expected_output) else: # check if two floats are equal - assert result == expected_output \ No newline at end of file + assert result == expected_output From 75a9d1e7cd9a6dfd74e62e420e33031b0d195480 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 13:43:20 -0400 Subject: [PATCH 08/19] exclude tests/test_data and notebooks/ --- .pre-commit-config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e4a84d..affdd2d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -64,3 +64,5 @@ repos: - id: docformatter additional_dependencies: [tomli] args: [--in-place, --config, ./pyproject.toml] + +exclude: ^(tests/test_data/ | notebooks/) \ No newline at end of file From 12e1632849151a2ee40d04dfde2cc302064124ac Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 13:48:27 -0400 Subject: [PATCH 09/19] fix end of file --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index affdd2d..79ad397 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,4 +65,4 @@ repos: additional_dependencies: [tomli] args: [--in-place, --config, ./pyproject.toml] -exclude: ^(tests/test_data/ | notebooks/) \ No newline at end of file +exclude: ^(tests/test_data/ | notebooks/) From 4539dba4793ee2d62688a44e98737454b9164459 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 13:49:46 -0400 Subject: [PATCH 10/19] fix flake8 --- src/diffpy/similarity/metrics/amd.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/diffpy/similarity/metrics/amd.py b/src/diffpy/similarity/metrics/amd.py index 890ae4f..9f3a474 100644 --- a/src/diffpy/similarity/metrics/amd.py +++ b/src/diffpy/similarity/metrics/amd.py @@ -19,9 +19,10 @@ def amd_compare(cif1, cif2, k=100): Returns ------- dm : float or pandas.DataFrame - If cif1 and cif2 are both strings, return the AMD distance value (float). - If either cif1 or cif2 is a list, return a distance matrix of shape (len(cif1), len(cif2)). - Each element represents the AMD distance between two structures from CIF files. + For single files cif1 and cif2, return the AMD distance value (float). + If either cif1 or cif2 is a list, return a distance matrix + of shape (len(cif1), len(cif2)). + Each element represents the AMD distance between two structures. """ dm = amd.compare(cif1, cif2, by="AMD", k=k) if isinstance(cif1, list) or isinstance(cif2, list): @@ -52,7 +53,7 @@ def pdd_compare(cif1, cif2, k=100): ------- dm : numpy.ndarray Distance matrix of shape (len(cif1), len(cif2)). - Each element represents the PDD distance between two structures from CIF files. + Each element represents the PDD distance between two structures. """ dm = amd.compare(cif1, cif2, by="PDD", k=k) if isinstance(cif1, list) or isinstance(cif2, list): From b55765e2b0b0e5068de0f98893bb224ab1639b19 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 13:50:52 -0400 Subject: [PATCH 11/19] fix codespell --- src/diffpy/similarity/metrics/amd.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffpy/similarity/metrics/amd.py b/src/diffpy/similarity/metrics/amd.py index 9f3a474..6218676 100644 --- a/src/diffpy/similarity/metrics/amd.py +++ b/src/diffpy/similarity/metrics/amd.py @@ -2,7 +2,7 @@ def amd_compare(cif1, cif2, k=100): - """Compare two CIF files or two lists fo CIF files using the AMD metric. + """Compare two CIF files or two lists of CIF files using the AMD metric. Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files in cif_list. @@ -20,7 +20,7 @@ def amd_compare(cif1, cif2, k=100): ------- dm : float or pandas.DataFrame For single files cif1 and cif2, return the AMD distance value (float). - If either cif1 or cif2 is a list, return a distance matrix + If either cif1 or cif2 is a list, return a distance matrix of shape (len(cif1), len(cif2)). Each element represents the AMD distance between two structures. """ @@ -35,7 +35,7 @@ def amd_compare(cif1, cif2, k=100): def pdd_compare(cif1, cif2, k=100): - """Compare two CIF files or two lists fo CIF files using the PDD metric. + """Compare two CIF files or two lists of CIF files using the PDD metric. Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files in cif_list. From 13c88cf29ef122a7b26400490a62c986da38ac54 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 13:52:54 -0400 Subject: [PATCH 12/19] add init files --- src/__init__.py | 0 src/diffpy/similarity/metrics/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/__init__.py create mode 100644 src/diffpy/similarity/metrics/__init__.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/diffpy/similarity/metrics/__init__.py b/src/diffpy/similarity/metrics/__init__.py new file mode 100644 index 0000000..e69de29 From 1fd556b57022f7f4d51a373bb1022444892bee68 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 14:01:39 -0400 Subject: [PATCH 13/19] add news item --- news/amd.rst | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 news/amd.rst diff --git a/news/amd.rst b/news/amd.rst new file mode 100644 index 0000000..e893a07 --- /dev/null +++ b/news/amd.rst @@ -0,0 +1,25 @@ +**Added:** + +* Functions to compute amd and pdd in src/diffpy/metrics/amd.py +* Tests for amd and pdd functions in tests/test_amd.py +* Test data (two cif files and pkl files (DataFrames)) in tests/test_data + +**Changed:** + +* Make pre-commit ignore test_data/ folder (in .pre-commit-config.yaml) + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* From 5840608f82e74686dbc2a82a41ac93c2f54ca666 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 24 Apr 2025 18:03:40 +0000 Subject: [PATCH 14/19] [pre-commit.ci] auto fixes from pre-commit hooks --- news/amd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/amd.rst b/news/amd.rst index e893a07..84ffea0 100644 --- a/news/amd.rst +++ b/news/amd.rst @@ -1,6 +1,6 @@ **Added:** -* Functions to compute amd and pdd in src/diffpy/metrics/amd.py +* Functions to compute amd and pdd in src/diffpy/metrics/amd.py * Tests for amd and pdd functions in tests/test_amd.py * Test data (two cif files and pkl files (DataFrames)) in tests/test_data From 0d7812acfd71cd8f5fc0f3741949e3e005cb7fb7 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 14:03:56 -0400 Subject: [PATCH 15/19] whitespace trimming --- news/amd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/amd.rst b/news/amd.rst index e893a07..84ffea0 100644 --- a/news/amd.rst +++ b/news/amd.rst @@ -1,6 +1,6 @@ **Added:** -* Functions to compute amd and pdd in src/diffpy/metrics/amd.py +* Functions to compute amd and pdd in src/diffpy/metrics/amd.py * Tests for amd and pdd functions in tests/test_amd.py * Test data (two cif files and pkl files (DataFrames)) in tests/test_data From 8765d72e0023a735863e193baaab6332afb38f85 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 17:19:03 -0400 Subject: [PATCH 16/19] delete __init__ from src/ --- src/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/__init__.py diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 From 3d3a5e429356851c14d645ee0c282a1e7593d607 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 17:30:28 -0400 Subject: [PATCH 17/19] separate amd and pdd --- src/diffpy/similarity/metrics/amd.py | 31 --------------------------- src/diffpy/similarity/metrics/pdd.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 31 deletions(-) create mode 100644 src/diffpy/similarity/metrics/pdd.py diff --git a/src/diffpy/similarity/metrics/amd.py b/src/diffpy/similarity/metrics/amd.py index 6218676..aa836a2 100644 --- a/src/diffpy/similarity/metrics/amd.py +++ b/src/diffpy/similarity/metrics/amd.py @@ -32,34 +32,3 @@ def amd_compare(cif1, cif2, k=100): return float( dm.iloc[0, 0] ) # return the single distance value as a float - - -def pdd_compare(cif1, cif2, k=100): - """Compare two CIF files or two lists of CIF files using the PDD metric. - Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files - in cif_list. - - Parameters - ---------- - cif1 : str - Path to the first CIF file (or list of CIF files) - cif2 : str - Path to the second CIF file (or list of CIF files) - k : int - Number of nearest neighbors to consider. - Default is 100. - - Returns - ------- - dm : numpy.ndarray - Distance matrix of shape (len(cif1), len(cif2)). - Each element represents the PDD distance between two structures. - """ - dm = amd.compare(cif1, cif2, by="PDD", k=k) - if isinstance(cif1, list) or isinstance(cif2, list): - # if at least one input is a list, return the distance matrix - return dm - else: - return float( - dm.iloc[0, 0] - ) # return the single distance value as a float diff --git a/src/diffpy/similarity/metrics/pdd.py b/src/diffpy/similarity/metrics/pdd.py new file mode 100644 index 0000000..a59b84e --- /dev/null +++ b/src/diffpy/similarity/metrics/pdd.py @@ -0,0 +1,32 @@ +import amd + + +def pdd_compare(cif1, cif2, k=100): + """Compare two CIF files or two lists of CIF files using the PDD metric. + Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files + in cif_list. + + Parameters + ---------- + cif1 : str + Path to the first CIF file (or list of CIF files) + cif2 : str + Path to the second CIF file (or list of CIF files) + k : int + Number of nearest neighbors to consider. + Default is 100. + + Returns + ------- + dm : numpy.ndarray + Distance matrix of shape (len(cif1), len(cif2)). + Each element represents the PDD distance between two structures. + """ + dm = amd.compare(cif1, cif2, by="PDD", k=k) + if isinstance(cif1, list) or isinstance(cif2, list): + # if at least one input is a list, return the distance matrix + return dm + else: + return float( + dm.iloc[0, 0] + ) # return the single distance value as a float From b5af57232172d21bd3ec792e1b59f142ba40bf21 Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 17:33:03 -0400 Subject: [PATCH 18/19] separate tests --- tests/test_amd.py | 26 +------------------------- tests/test_pdd.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 25 deletions(-) create mode 100644 tests/test_pdd.py diff --git a/tests/test_amd.py b/tests/test_amd.py index 908f19d..09fb478 100644 --- a/tests/test_amd.py +++ b/tests/test_amd.py @@ -3,14 +3,13 @@ import pandas as pd import pytest -from diffpy.similarity.metrics.amd import amd_compare, pdd_compare +from diffpy.similarity.metrics.amd import amd_compare # get cif files for tests curr_path = Path().absolute() cif1 = curr_path / "tests" / "test_data" / "mp-390.cif" cif2 = curr_path / "tests" / "test_data" / "mp-458.cif" dm_amd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_amd.pkl") -dm_pdd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_pdd.pkl") # set up input and output for tests # for each case: (input1, input2, expected_output) @@ -34,26 +33,3 @@ def test_amd_compare(input1, input2, expected_output): else: # check if two floats are equal assert result == expected_output - - -# for each case: (input1, input2, expected_output) -pdd_test_datasets = [ - # test identical cif files - (cif1, cif1, 0.0), - # test different cif files - (cif1, cif2, 0.6675364987310654), - # test generate a distance matrix from two lists - ([cif1, cif2], [cif1, cif2], dm_pdd), -] - - -@pytest.mark.parametrize("input1, input2, expected_output", pdd_test_datasets) -def test_pdd_compare(input1, input2, expected_output): - """Test the pdd_compare function.""" - result = pdd_compare(input1, input2, k=100) - if isinstance(input1, list) or isinstance(input2, list): - # check if two dataframes are equal - assert result.equals(expected_output) - else: - # check if two floats are equal - assert result == expected_output diff --git a/tests/test_pdd.py b/tests/test_pdd.py new file mode 100644 index 0000000..770ada5 --- /dev/null +++ b/tests/test_pdd.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from diffpy.similarity.metrics.pdd import pdd_compare + +# get cif files for tests +curr_path = Path().absolute() +cif1 = curr_path / "tests" / "test_data" / "mp-390.cif" +cif2 = curr_path / "tests" / "test_data" / "mp-458.cif" +dm_pdd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_pdd.pkl") + +# set up input and output for tests +# for each case: (input1, input2, expected_output) +pdd_test_datasets = [ + # test identical cif files + (cif1, cif1, 0.0), + # test different cif files + (cif1, cif2, 0.6675364987310654), + # test generate a distance matrix from two lists + ([cif1, cif2], [cif1, cif2], dm_pdd), +] + + +@pytest.mark.parametrize("input1, input2, expected_output", pdd_test_datasets) +def test_pdd_compare(input1, input2, expected_output): + """Test the pdd_compare function.""" + result = pdd_compare(input1, input2, k=100) + if isinstance(input1, list) or isinstance(input2, list): + # check if two dataframes are equal + assert result.equals(expected_output) + else: + # check if two floats are equal + assert result == expected_output From c2301febbd3972ede8a5ef20d571423daee4703a Mon Sep 17 00:00:00 2001 From: tinatn29 Date: Thu, 24 Apr 2025 17:35:37 -0400 Subject: [PATCH 19/19] edit news item --- news/amd.rst | 25 ------------------------- news/amd_pdd.rst | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 25 deletions(-) delete mode 100644 news/amd.rst create mode 100644 news/amd_pdd.rst diff --git a/news/amd.rst b/news/amd.rst deleted file mode 100644 index 84ffea0..0000000 --- a/news/amd.rst +++ /dev/null @@ -1,25 +0,0 @@ -**Added:** - -* Functions to compute amd and pdd in src/diffpy/metrics/amd.py -* Tests for amd and pdd functions in tests/test_amd.py -* Test data (two cif files and pkl files (DataFrames)) in tests/test_data - -**Changed:** - -* Make pre-commit ignore test_data/ folder (in .pre-commit-config.yaml) - -**Deprecated:** - -* - -**Removed:** - -* - -**Fixed:** - -* - -**Security:** - -* diff --git a/news/amd_pdd.rst b/news/amd_pdd.rst new file mode 100644 index 0000000..ebf566a --- /dev/null +++ b/news/amd_pdd.rst @@ -0,0 +1,24 @@ +**Added:** + +* Function to compare structures with amd in src/diffpy/metrics/amd.py +* Function to compare structures with pdd in src/diffpy/metrics/pdd.py + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +*