Skip to content

Commit

Permalink
Change COMA API to have "use_instances" as keyword instead of "strate…
Browse files Browse the repository at this point in the history
…gy" (#60)

* Change COMA API to have "use_instances" as keyword

* Use keyword anyway for example and readme, to show users the option exists

---------

Co-authored-by: Shaad Alaka <[email protected]>
  • Loading branch information
Archer6621 and Shaad Alaka committed Oct 10, 2023
1 parent 06fdc82 commit 3b67275
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ df1 = pd.read_csv(d1_path)
df2 = pd.read_csv(d2_path)

# Instantiate matcher and run
matcher = Coma(strategy="COMA_OPT")
matcher = Coma(use_instances=True)
matches = valentine_match(df1, df2, matcher)

print(matches)
Expand Down
2 changes: 1 addition & 1 deletion examples/valentine_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def main():
# Instantiate matcher and run
# Coma requires java to be installed on your machine
# If java is not an option, all the other algorithms are in Python (e.g., Cupid)
matcher = Coma(strategy="COMA_OPT")
matcher = Coma(use_instances=False)
matches = valentine_match(df1, df2, matcher)

# If ground truth available valentine could calculate the metrics
Expand Down
4 changes: 2 additions & 2 deletions tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ class TestAlgorithms(unittest.TestCase):

def test_coma(self):
# Test the schema variant of coma
coma_matcher_schema = Coma(strategy="COMA_OPT")
coma_matcher_schema = Coma(use_instances=False)
matches_coma_matcher_schema = coma_matcher_schema.get_matches(d1, d2)
assert len(matches_coma_matcher_schema) > 0 # Check that it actually produced output
# Test the instance variant of coma
coma_matcher_instances = Coma(strategy="COMA_OPT_INST")
coma_matcher_instances = Coma(use_instances=True)
matches_coma_matcher_instances = coma_matcher_instances.get_matches(d1, d2)
assert len(matches_coma_matcher_instances) > 0 # Check that it actually produced output
# Assume the Schema and instance should provide different results
Expand Down
4 changes: 2 additions & 2 deletions tests/test_valentine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class TestValentine(unittest.TestCase):
def test_match(self):
assert not DataframeTable(df1, name='df1_name').is_empty
assert not DataframeTable(df2, name='df2_name').is_empty
matches = valentine_match(df1, df2, Coma(strategy="COMA_OPT_INST"))
matches = valentine_match(df1, df2, Coma(use_instances=True))
assert len(matches) > 0
try:
valentine_match(df1, df2, None)
Expand All @@ -22,7 +22,7 @@ def test_match(self):
assert False

def test_metrics(self):
matches = valentine_match(df1, df2, Coma(strategy="COMA_OPT_INST"))
matches = valentine_match(df1, df2, Coma(use_instances=True))
golden_standard = [('Cited by', 'Cited by'),
('Authors', 'Authors'),
('EID', 'EID')]
Expand Down
4 changes: 2 additions & 2 deletions valentine/algorithms/coma/coma.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ class Coma(BaseMatcher):

def __init__(self,
max_n: int = 0,
strategy: str = "COMA_OPT",
use_instances: bool = False,
java_xmx: str = "1024m"):
self.__max_n = int(max_n)
self.__strategy = strategy
self.__strategy = "COMA_OPT_INST" if use_instances else "COMA_OPT"
self.__java_XmX = java_xmx

def get_matches(self,
Expand Down

0 comments on commit 3b67275

Please sign in to comment.