diff --git a/concepts/algorithms/__init__.py b/concepts/algorithms/__init__.py index 034c500..fcf2ed9 100644 --- a/concepts/algorithms/__init__.py +++ b/concepts/algorithms/__init__.py @@ -5,10 +5,11 @@ from .common import iterunion from .fcbo import fast_generate_from, fcbo_dual from .lindig import lattice, neighbors +from .covering_edges import lattice_fcbo __all__ = ['iterunion', 'fast_generate_from', 'fcbo_dual', - 'lattice', 'neighbors' + 'lattice', 'neighbors', 'lattice_fcbo', 'iterconcepts', 'get_concepts'] diff --git a/concepts/algorithms/covering_edges.py b/concepts/algorithms/covering_edges.py new file mode 100644 index 0000000..53da1f6 --- /dev/null +++ b/concepts/algorithms/covering_edges.py @@ -0,0 +1,122 @@ +"""Covering Edges + +cf. Carpineto, Claudio, and Giovanni Romano. +Concept data analysis: Theory and applications. +John Wiley & Sons, 2004. +""" + +import multiprocessing +import itertools +import collections + +from .fcbo import fast_generate_from + + +def covering_edges(concept_list, context, concept_index=None): + """Yield mapping edge as ``((extent, intent), (lower_extent, lower_intent))`` + pairs (concept and it's lower neighbor) from ``context`` and ``concept_list`` + + Example: + >>> from concepts import make_context, ConceptList + >>> from concepts._common import Concept + + >>> context = make_context(''' + ... |0|1|2|3|4|5| + ... A|X|X|X| | | | + ... B|X| |X|X|X|X| + ... C|X|X| | |X| | + ... D| |X|X| | | |''') + + >>> concepts = [('ABCD', ''), + ... ('ABC', '0'), + ... ('AC', '01'), + ... ('A', '012'), + ... ('', '012345'), + ... ('C', '014'), + ... ('AB', '02'), + ... ('B', '02345'), + ... ('BC', '04'), + ... ('ACD', '1'), + ... ('AD', '12'), + ... ('ABD', '2')] + + >>> concept_list = ConceptList.frompairs( + ... map(lambda c: (context._Objects.frommembers(c[0]), + ... context._Properties.frommembers(c[1])), + ... concepts)) + + >>> edges = covering_edges(concept_list, context) + + >>> [(''.join(concept[0].members()), # doctest: +NORMALIZE_WHITESPACE + ... ''.join(lower[0].members())) + ... for concept, lower in edges] + [('ABCD', 'ABC'), + ('ABCD', 'ACD'), + ('ABCD', 'ABD'), + ('ABC', 'AC'), + ('ABC', 'AB'), + ('ABC', 'BC'), + ('AC', 'A'), + ('AC', 'C'), + ('A', ''), + ('C', ''), + ('AB', 'A'), + ('AB', 'B'), + ('B', ''), + ('BC', 'C'), + ('BC', 'B'), + ('ACD', 'AC'), + ('ACD', 'AD'), + ('AD', 'A'), + ('ABD', 'AB'), + ('ABD', 'AD')] + """ + Objects = context._Objects + Properties = context._Properties + + if not concept_index: + concept_index = dict(concept_list) + + for extent, intent in concept_list: + candidate_counter = collections.Counter() + + property_candidates = Properties.fromint(Properties.supremum & ~intent) + + for atom in property_candidates.atoms(): + extent_candidate = Objects.fromint(extent & atom.prime()) + intent_candidate = concept_index[extent_candidate] + candidate_counter[extent_candidate] += 1 + + if (intent_candidate.count() - intent.count()) == candidate_counter[extent_candidate]: + yield (extent, intent), (extent_candidate, intent_candidate) + + +def _return_edges(batch, concept_index, context): + return list(covering_edges(batch, concept_index, context)) + + +def lattice_fcbo(context, process_count=1): + """Returns tuple of tuples in form of ``(extent, intent, upper, lower)`` in short lexicographic order.""" + concepts = list(fast_generate_from(context)) + concepts.sort(key=lambda concept: concept[0].shortlex()) + concept_index = dict(concepts) + + if process_count == 1: + edges = covering_edges(concepts, context, concept_index=concept_index) + else: + batches = [concepts[i::process_count] for i in range(0, process_count)] + + with multiprocessing.Pool(process_count) as p: + results = [p.apply_async(_return_edges, (batch, context, concept_index)) for batch in batches] + edges = itertools.chain.from_iterable([result.get() for result in results]) + + mapping = dict([(extent, (extent, intent, [], [])) for extent, intent in concepts]) + + for concept, lower_neighbor in edges: + extent, _ = concept + lower_extent, _ = lower_neighbor + + mapping[extent][3].append(lower_extent) + mapping[lower_extent][2].append(extent) + + return tuple(mapping.values()) \ No newline at end of file diff --git a/concepts/contexts.py b/concepts/contexts.py index 0479e99..bebe582 100644 --- a/concepts/contexts.py +++ b/concepts/contexts.py @@ -465,13 +465,37 @@ def _minimize(extent, intent): class LatticeMixin: + algorithm_for_lattice: str = 'lindig' + process_count: int = 1 + _parallel_algorithms: list = ['fcbo'] + _single_thread_algorithms: list = ['lindig'] + + def __init__(self, + algorithm_for_lattice: typing.Optional[str] = None, + process_count: typing.Optional[int] = None) -> None: + + if algorithm_for_lattice is not None: + if algorithm_for_lattice not in self._parallel_algorithms + self._single_thread_algorithms: + raise NotImplementedError + self.algorithm_for_lattice = algorithm_for_lattice + + if process_count is not None: + if self.algorithm_for_lattice not in self._parallel_algorithms: + raise NotImplementedError + self.process_count = process_count def _lattice(self, infimum=()): """Yield ``(extent, intent, upper, lower)`` in short lexicographic order. cf. C. Lindig. 2000. Fast Concept Analysis. """ - return algorithms.lattice(self._Objects, infimum=infimum) + + if self.algorithm_for_lattice == 'lindig': + return algorithms.lattice(self._Objects, infimum=infimum) + elif self.algorithm_for_lattice == 'fcbo': + return algorithms.lattice_fcbo(self, process_count=self.process_count) + else: + raise NotImplementedError def _neighbors(self, objects): """Yield upper neighbors from extent (in colex order?). @@ -630,23 +654,36 @@ def todict(self, ignore_lattice: bool = False class Context(ExportableMixin, LatticeMixin, MinimizeMixin, PrimeMixin, ComparableMixin, FormattingMixin, Data): - """Formal context defining a relation between objects and properties. + """Formal context defining a relation between objects and properties.""" - Create context from ``objects``, ``properties``, and ``bools`` correspondence. + def __init__(self, + objects: typing.Iterable[str], + properties: typing.Iterable[str], + bools: typing.Iterable[typing.Tuple[bool, ...]], + algorithm_for_lattice: typing.Optional[str] = None, + process_count: typing.Optional[int] = None): + """Create context from ``objects``, ``properties``, and ``bools`` correspondence. - Args: - objects: Iterable of object label strings. - properties: Iterable of property label strings. - bools: Iterable of ``len(objects)`` tuples of ``len(properties)`` booleans. + Args: + objects: Iterable of object label strings. + properties: Iterable of property label strings. + bools: Iterable of ``len(objects)`` tuples of ``len(properties)`` booleans. + algorithm_for_lattice: String specifing name of the default algorithm which is + used to build the lattice. - Returns: - Context: New :class:`.Context` instance. + Returns: + Context: New :class:`.Context` instance. - Example: - >>> from concepts import Context - >>> Context(['man', 'woman'], ['male', 'female'], [(True, False), (False, True)]) # doctest: +ELLIPSIS - - """ + Example: + >>> from concepts import Context + >>> Context(['man', 'woman'], + ... ['male', 'female'], + ... [(True, False), (False, True)]) # doctest: +ELLIPSIS + + """ + Data.__init__(self, objects, properties, bools) + LatticeMixin.__init__(self, algorithm_for_lattice, process_count) + @property def objects(self) -> typing.Tuple[str, ...]: diff --git a/tests/conftest.py b/tests/conftest.py index 14773e7..818333e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -66,6 +66,28 @@ def lattice(context): return context.lattice +@pytest.fixture(scope='session') +def bob_ross(test_examples, filename='bob-ross.cxt'): + path = test_examples / filename + + context = concepts.load_cxt(str(path), encoding='utf-8') + + assert context.shape == (403, 67) + + return context + + +@pytest.fixture(scope='session') +def mushroom(test_examples, filename='mushroom.cxt'): + path = test_examples / filename + + context = concepts.load_cxt(str(path)) + + assert context.shape == (8_124, 119) + + return context + + @pytest.fixture(params=['str', 'bytes', 'pathlike', 'fileobj']) def path_or_fileobj(request, tmp_path, filename='context.json'): if request.param == 'str': diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py index afe4bbe..1529d79 100644 --- a/tests/test_algorithms.py +++ b/tests/test_algorithms.py @@ -11,28 +11,6 @@ ENCODING = 'utf-8' -@pytest.fixture -def bob_ross(test_examples, filename=BOB_ROSS): - path = test_examples / filename - - context = concepts.load_cxt(str(path), encoding=ENCODING) - - assert context.shape == (403, 67) - - return context - - -@pytest.fixture -def mushroom(test_examples, filename='mushroom.cxt'): - path = test_examples / filename - - context = concepts.load_cxt(str(path)) - - assert context.shape == (8_124, 119) - - return context - - def test_lattice(lattice): pairs = [f'{x._extent.bits()} <-> {x._intent.bits()}' for x in lattice]