Skip to content

amd and pdd functions and tests #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,5 @@ repos:
- id: docformatter
additional_dependencies: [tomli]
args: [--in-place, --config, ./pyproject.toml]

exclude: ^(tests/test_data/ | notebooks/)
24 changes: 24 additions & 0 deletions news/amd_pdd.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
**Added:**

* Function to compare structures with amd in src/diffpy/metrics/amd.py
* Function to compare structures with pdd in src/diffpy/metrics/pdd.py

**Changed:**

* <news item>

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* <news item>

**Security:**

* <news item>
Empty file.
34 changes: 34 additions & 0 deletions src/diffpy/similarity/metrics/amd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import amd


def amd_compare(cif1, cif2, k=100):
"""Compare two CIF files or two lists of CIF files using the AMD metric.
Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files
in cif_list.

Parameters
----------
cif1 : str
Path to the first CIF file (or list of CIF files)
cif2 : str
Path to the second CIF file (or list of CIF files)
k : int
Number of nearest neighbors to consider.
Default is 100.

Returns
-------
dm : float or pandas.DataFrame
For single files cif1 and cif2, return the AMD distance value (float).
If either cif1 or cif2 is a list, return a distance matrix
of shape (len(cif1), len(cif2)).
Each element represents the AMD distance between two structures.
"""
dm = amd.compare(cif1, cif2, by="AMD", k=k)
if isinstance(cif1, list) or isinstance(cif2, list):
# if at least one input is a list, return the distance matrix
return dm
else:
return float(
dm.iloc[0, 0]
) # return the single distance value as a float
32 changes: 32 additions & 0 deletions src/diffpy/similarity/metrics/pdd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import amd


def pdd_compare(cif1, cif2, k=100):
"""Compare two CIF files or two lists of CIF files using the PDD metric.
Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files
in cif_list.

Parameters
----------
cif1 : str
Path to the first CIF file (or list of CIF files)
cif2 : str
Path to the second CIF file (or list of CIF files)
k : int
Number of nearest neighbors to consider.
Default is 100.

Returns
-------
dm : numpy.ndarray
Distance matrix of shape (len(cif1), len(cif2)).
Each element represents the PDD distance between two structures.
"""
dm = amd.compare(cif1, cif2, by="PDD", k=k)
if isinstance(cif1, list) or isinstance(cif2, list):
# if at least one input is a list, return the distance matrix
return dm
else:
return float(
dm.iloc[0, 0]
) # return the single distance value as a float
35 changes: 35 additions & 0 deletions tests/test_amd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pathlib import Path

import pandas as pd
import pytest

from diffpy.similarity.metrics.amd import amd_compare

# get cif files for tests
curr_path = Path().absolute()
cif1 = curr_path / "tests" / "test_data" / "mp-390.cif"
cif2 = curr_path / "tests" / "test_data" / "mp-458.cif"
dm_amd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_amd.pkl")

# set up input and output for tests
# for each case: (input1, input2, expected_output)
amd_test_datasets = [
# test identical cif files
(cif1, cif1, 0.0),
# test different cif files
(cif1, cif2, 0.5725326132426836),
# test generate a distance matrix from two lists
([cif1, cif2], [cif1, cif2], dm_amd),
]


@pytest.mark.parametrize("input1, input2, expected_output", amd_test_datasets)
def test_amd_compare(input1, input2, expected_output):
"""Test the amd_compare function."""
result = amd_compare(input1, input2, k=100)
if isinstance(input1, list) or isinstance(input2, list):
# check if two dataframes are equal
assert result.equals(expected_output)
else:
# check if two floats are equal
assert result == expected_output
Binary file added tests/test_data/dm_amd.pkl
Binary file not shown.
Binary file added tests/test_data/dm_pdd.pkl
Binary file not shown.
32 changes: 32 additions & 0 deletions tests/test_data/mp-390.cif
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# generated using pymatgen
data_TiO2
_symmetry_space_group_name_H-M 'P 1'
_cell_length_a 3.78253951
_cell_length_b 3.78253984
_cell_length_c 5.50145140
_cell_angle_alpha 110.10714408
_cell_angle_beta 110.10713258
_cell_angle_gamma 89.99997885
_symmetry_Int_Tables_number 1
_chemical_formula_structural TiO2
_chemical_formula_sum 'Ti2 O4'
_cell_volume 68.78397225
_cell_formula_units_Z 2
loop_
_symmetry_equiv_pos_site_id
_symmetry_equiv_pos_as_xyz
1 'x, y, z'
loop_
_atom_site_type_symbol
_atom_site_label
_atom_site_symmetry_multiplicity
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
_atom_site_occupancy
Ti Ti0 1 0.87500000 0.62500000 0.25000000 1
Ti Ti1 1 0.12500000 0.37500000 0.75000000 1
O O2 1 0.33215263 0.58215263 0.16430425 1
O O3 1 0.08215263 0.83215263 0.66430425 1
O O4 1 0.91784737 0.16784737 0.33569575 1
O O5 1 0.66784737 0.41784737 0.83569575 1
36 changes: 36 additions & 0 deletions tests/test_data/mp-458.cif
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# generated using pymatgen
data_Ti2O3
_symmetry_space_group_name_H-M 'P 1'
_cell_length_a 5.45736044
_cell_length_b 5.45736193
_cell_length_c 5.45736145
_cell_angle_alpha 55.79413805
_cell_angle_beta 55.79412997
_cell_angle_gamma 55.79413435
_symmetry_Int_Tables_number 1
_chemical_formula_structural Ti2O3
_chemical_formula_sum 'Ti4 O6'
_cell_volume 103.72111553
_cell_formula_units_Z 2
loop_
_symmetry_equiv_pos_site_id
_symmetry_equiv_pos_as_xyz
1 'x, y, z'
loop_
_atom_site_type_symbol
_atom_site_label
_atom_site_symmetry_multiplicity
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
_atom_site_occupancy
Ti Ti0 1 0.34453747 0.34453747 0.34453747 1
Ti Ti1 1 0.15546253 0.15546253 0.15546253 1
Ti Ti2 1 0.65546253 0.65546253 0.65546253 1
Ti Ti3 1 0.84453747 0.84453747 0.84453747 1
O O4 1 0.56439446 0.93560554 0.25000000 1
O O5 1 0.25000000 0.56439446 0.93560554 1
O O6 1 0.93560554 0.25000000 0.56439446 1
O O7 1 0.43560554 0.06439446 0.75000000 1
O O8 1 0.75000000 0.43560554 0.06439446 1
O O9 1 0.06439446 0.75000000 0.43560554 1
35 changes: 35 additions & 0 deletions tests/test_pdd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pathlib import Path

import pandas as pd
import pytest

from diffpy.similarity.metrics.pdd import pdd_compare

# get cif files for tests
curr_path = Path().absolute()
cif1 = curr_path / "tests" / "test_data" / "mp-390.cif"
cif2 = curr_path / "tests" / "test_data" / "mp-458.cif"
dm_pdd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_pdd.pkl")

# set up input and output for tests
# for each case: (input1, input2, expected_output)
pdd_test_datasets = [
# test identical cif files
(cif1, cif1, 0.0),
# test different cif files
(cif1, cif2, 0.6675364987310654),
# test generate a distance matrix from two lists
([cif1, cif2], [cif1, cif2], dm_pdd),
]


@pytest.mark.parametrize("input1, input2, expected_output", pdd_test_datasets)
def test_pdd_compare(input1, input2, expected_output):
"""Test the pdd_compare function."""
result = pdd_compare(input1, input2, k=100)
if isinstance(input1, list) or isinstance(input2, list):
# check if two dataframes are equal
assert result.equals(expected_output)
else:
# check if two floats are equal
assert result == expected_output