Skip to content

Commit

Permalink
Add Hamming Distance knn similarity metric for long property
Browse files Browse the repository at this point in the history
+ tests
  • Loading branch information
htmlboss committed May 11, 2022
1 parent 255e450 commit 0d3e307
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.gds.similarity.knn.metrics;

import java.lang.Long;

/**
* We compute the Hamming Distance,
* (https://en.wikipedia.org/wiki/Hamming_distance) and turn it into
* a similarity metric by clamping into 0..1 range using a linear
* transformation.
*/
public final class HammingDistance {
private HammingDistance() {}

public static double longMetric(long left, long right) {
return Long.bitcount(left ^ right);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,20 @@

final class LongPropertySimilarityComputer implements SimilarityComputer {
private final NodePropertyValues nodePropertyValues;
private final LongPropertySimilarityMetric metric;

LongPropertySimilarityComputer(NodePropertyValues nodePropertyValues) {
LongPropertySimilarityComputer(NodePropertyValues nodePropertyValues, LongPropertySimilarityMetric metric) {
if (nodePropertyValues.valueType() != ValueType.LONG) {
throw new IllegalArgumentException("The property is not of type LONG");
}
this.nodePropertyValues = nodePropertyValues;
this.metric = metric;
}

@Override
public double similarity(long firstNodeId, long secondNodeId) {
var left = nodePropertyValues.longValue(firstNodeId);
var right = nodePropertyValues.longValue(secondNodeId);
var abs = Math.abs(left - right);
if (abs == Long.MIN_VALUE) {
abs = Long.MAX_VALUE;
}
return 1.0 / (1.0 + abs);
return metric.compute(left, right);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.gds.similarity.knn.metrics;

public final class NormalizedAbsoluteDifference {
private NormalizedAbsoluteDifference() {}

public static double longMetric(long left, long right) {
var abs = Math.abs(left - right);
if (abs == Long.MIN_VALUE) {
abs = Long.MAX_VALUE;
}
return 1.0 / (1.0 + abs);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ static SimilarityComputer ofProperty(
) {
switch (properties.valueType()) {
case LONG:
return ofLongProperty(properties);
return ofLongProperty(
name,
properties,
defaultSimilarityMetric
);
case DOUBLE:
return ofDoubleProperty(properties);
case DOUBLE_ARRAY:
Expand Down Expand Up @@ -107,8 +111,15 @@ static SimilarityComputer ofDoubleProperty(NodePropertyValues nodePropertyValues
return new DoublePropertySimilarityComputer(nodePropertyValues);
}

static SimilarityComputer ofLongProperty(NodePropertyValues nodePropertyValues) {
return new LongPropertySimilarityComputer(nodePropertyValues);
static SimilarityComputer ofLongProperty(String name, NodePropertyValues properties, SimilarityMetric metric) {
switch (metric) {
case HAMMING_DISTANCE:
return new LongPropertySimilarityComputer(properties, HammingDistance::longMetric);
case NORMALIZED_ABSOLUTE_DIFFERENCE:
return new LongPropertySimilarityComputer(properties, NormalizedAbsoluteDifference::longMetric);
default:
throw unsupportedSimilarityMetric(name, properties.valueType(), metric);
}
}

static SimilarityComputer ofFloatArrayProperty(String name, NodePropertyValues properties, SimilarityMetric metric) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
import static org.neo4j.gds.utils.StringFormatting.toUpperCaseWithLocale;

public enum SimilarityMetric {
JACCARD, OVERLAP, COSINE, EUCLIDEAN, PEARSON, LONG_PROPERTY_METRIC, DOUBLE_PROPERTY_METRIC, DEFAULT;
JACCARD, OVERLAP, COSINE, EUCLIDEAN, PEARSON,
NORMALIZED_ABSOLUTE_DIFFERENCE, DOUBLE_PROPERTY_METRIC,
HAMMING_DISTANCE, DEFAULT;

public static SimilarityMetric parse(String value) {
return SimilarityMetric.valueOf(toUpperCaseWithLocale(value));
Expand All @@ -34,7 +36,7 @@ public static SimilarityMetric parse(String value) {
public static SimilarityMetric defaultMetricForType(ValueType valueType) {
switch (valueType) {
case LONG:
return LONG_PROPERTY_METRIC;
return NORMALIZED_ABSOLUTE_DIFFERENCE;
case DOUBLE:
return DOUBLE_PROPERTY_METRIC;
case DOUBLE_ARRAY:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.gds.similarity.knn.metrics;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class HammingDistanceTest {
@Test
void shouldReturnFullCorrelationWhenArgsAreIdentical() {
double dist = HammingDistance.longMetric(12345L, 12345L);

assertEquals(1.0, dist);
}

@Test
void shouldReturnCorrectCorrelation() {
double dist = HammingDistance.longMetric(12345L, 54321L);

assertEquals(1.0, dist);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.gds.similarity.knn.metrics;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class NormalizedAbsoluteDifferenceTest {
@Test
void shouldComputeNormalizedAbsoluteDifference() {
double diff = NormalizedAbsoluteDifference.longMetric(1L, 2L);

assertEquals(1.0, diff);
}
}

0 comments on commit 0d3e307

Please sign in to comment.