diff --git a/docs/usage.rst b/docs/usage.rst index c781703..5ab1d28 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -203,8 +203,26 @@ Furthermore, the delimiter for the string output can be set through: >>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter=";").initials() 'J; A; K; D;' - >>> HumanName("Doe, John A. Kenneth, Jr.", initials_format="{first}{middle}{last}", initials_delimiter=".").initials() - 'J.A. K.D.' + +The separator between consecutive initials *within* a name group (e.g. two middle +names) is controlled by :py:attr:`~nameparser.config.Constants.initials_separator`, +which defaults to ``" "``. Setting it to ``""`` removes that space within a group; +spacing *between* groups is still governed by ``initials_format``. + +``initials_delimiter``, ``initials_separator``, and ``initials_format`` work together: + +- ``initials_delimiter`` — appended *after* each individual initial (default ``"."``) +- ``initials_separator`` — placed *after* the delimiter between consecutive initials in the same group (default ``" "``), so with ``delimiter="."`` and ``separator=" "`` you get ``A. K.`` +- ``initials_format`` — controls how the first, middle, and last groups are arranged + +For example, to produce compact period-separated initials with no spaces: + +.. doctest:: initials separator + + >>> HumanName("Doe, John A. Kenneth, Jr.", initials_separator="", initials_format="{first}{middle}{last}").initials() + 'J.A.K.D.' + >>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter="", initials_separator="", initials_format="{first}{middle}{last}").initials() + 'JAKD' To get a list representation of the initials, use :py:meth:`~nameparser.HumanName.initials_list`. This function is unaffected by :py:attr:`~nameparser.config.Constants.initials_format` diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index d2a0370..6dd591c 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -215,6 +215,19 @@ class Constants: Will be used to add a delimiter between each initial. """ + initials_separator = " " + """ + The default separator placed between consecutive initials within a name + group (first, middle, or last). Distinct from ``initials_delimiter``, + which is the trailing character after each individual initial. + + With defaults ``initials_delimiter="."`` and ``initials_separator=" "``, + ``initials()`` produces ``"J. A. D."``. Setting ``initials_separator=""`` + with ``initials_delimiter="."`` and ``initials_format="{first}{middle}{last}"`` + produces ``"J.A.D."``. With the default ``initials_format``, group-level + spacing from the template is still applied. + """ + empty_attribute_default = '' """ Default return value for empty attributes. diff --git a/nameparser/parser.py b/nameparser/parser.py index b0cc008..4e9d188 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -94,6 +94,7 @@ def __init__( string_format: str | None = None, initials_format: str | None = None, initials_delimiter: str | None = None, + initials_separator: str | None = None, first: str | list[str] | None = None, middle: str | list[str] | None = None, last: str | list[str] | None = None, @@ -106,9 +107,10 @@ def __init__( self.C = Constants() self.encoding = encoding - self.string_format = string_format or self.C.string_format - self.initials_format = initials_format or self.C.initials_format - self.initials_delimiter = initials_delimiter or self.C.initials_delimiter + self.string_format = string_format if string_format is not None else self.C.string_format + self.initials_format = initials_format if initials_format is not None else self.C.initials_format + self.initials_delimiter = initials_delimiter if initials_delimiter is not None else self.C.initials_delimiter + self.initials_separator = initials_separator if initials_separator is not None else self.C.initials_separator if (first or middle or last or title or suffix or nickname): self.first = first self.middle = middle @@ -177,7 +179,7 @@ def __next__(self) -> str: return getattr(self, self._members[c]) or next(self) def __str__(self) -> str: - if self.string_format: + if self.string_format is not None: # string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" _s = self.string_format.format(**self.as_dict()) # remove trailing punctuation from missing nicknames @@ -241,7 +243,7 @@ def __process_initial__(self, name_part: str, firstname: bool = False) -> str: if not (self.is_prefix(part) or self.is_conjunction(part)) or firstname: initials.append(part[0]) if len(initials) > 0: - return " ".join(initials) + return self.initials_separator.join(initials) else: return self.C.empty_attribute_default @@ -265,19 +267,25 @@ def initials_list(self) -> list[str]: def initials(self) -> str: """ - Return period-delimited initials of the first, middle and optionally last name. + Return formatted initials for the name, controlled by + ``initials_format``, ``initials_delimiter``, and ``initials_separator``. - :param bool include_last_name: Include the last name as part of the initials - :rtype: str + ``initials_delimiter`` is appended after each individual initial. + ``initials_separator`` is placed between consecutive initials within + a name group (first, middle, or last). Both can be set as + ``Constants`` attributes or as ``HumanName`` constructor kwargs. - .. doctest:: + .. doctest:: - >>> name = HumanName("Sir Bob Andrew Dole") - >>> name.initials() - "B. A. D." - >>> name = HumanName("Sir Bob Andrew Dole", initials_format="{first} {middle}") - >>> name.initials() - "B. A." + >>> name = HumanName("Sir Bob Andrew Dole") + >>> name.initials() + "B. A. D." + >>> name = HumanName("Sir Bob Andrew Dole", initials_format="{first} {middle}") + >>> name.initials() + "B. A." + >>> name = HumanName("Doe, John A.", initials_delimiter="", initials_separator="") + >>> name.initials() + "J A D" """ first_initials_list = [self.__process_initial__(name, True) for name in self.first_list if name] @@ -289,11 +297,11 @@ def initials(self) -> str: # output. A fully-empty result falls back to empty_attribute_default, # matching the other attribute accessors (e.g. ``first``). initials_dict = { - "first": (self.initials_delimiter + " ").join(first_initials_list) + self.initials_delimiter + "first": (self.initials_delimiter + self.initials_separator).join(first_initials_list) + self.initials_delimiter if len(first_initials_list) else "", - "middle": (self.initials_delimiter + " ").join(middle_initials_list) + self.initials_delimiter + "middle": (self.initials_delimiter + self.initials_separator).join(middle_initials_list) + self.initials_delimiter if len(middle_initials_list) else "", - "last": (self.initials_delimiter + " ").join(last_initials_list) + self.initials_delimiter + "last": (self.initials_delimiter + self.initials_separator).join(last_initials_list) + self.initials_delimiter if len(last_initials_list) else "" } diff --git a/tests/base.py b/tests/base.py index 6b09ea1..254cfef 100644 --- a/tests/base.py +++ b/tests/base.py @@ -39,6 +39,9 @@ def assertFalse(self, expr: object, msg: object = None) -> None: def assertIn(self, member: object, container: object, msg: object = None) -> None: assert member in container, msg # type: ignore[operator] + def assertNotEqual(self, first: object, second: object, msg: object = None) -> None: + assert first != second, msg + def assertNotIn(self, member: object, container: object, msg: object = None) -> None: assert member not in container, msg # type: ignore[operator] diff --git a/tests/test_initials.py b/tests/test_initials.py index bccf8b4..6c410ff 100644 --- a/tests/test_initials.py +++ b/tests/test_initials.py @@ -69,10 +69,16 @@ def test_initials_delimiter(self) -> None: def test_initials_delimiter_constants(self) -> None: from nameparser.config import CONSTANTS _orig = CONSTANTS.initials_delimiter - CONSTANTS.initials_delimiter = ";" - hn = HumanName("Doe, John A. Kenneth, Jr.") - self.m(hn.initials(), "J; A; K; D;", hn) - CONSTANTS.initials_delimiter = _orig + try: + CONSTANTS.initials_delimiter = ";" + hn = HumanName("Doe, John A. Kenneth, Jr.") + self.m(hn.initials(), "J; A; K; D;", hn) + finally: + CONSTANTS.initials_delimiter = _orig + + def test_initials_separator_default_on_constants(self) -> None: + from nameparser.config import CONSTANTS + self.assertEqual(CONSTANTS.initials_separator, " ") def test_initials_list(self) -> None: hn = HumanName("Andrew Boris Petersen") @@ -90,6 +96,44 @@ def test_initials_with_prefix(self) -> None: hn = HumanName("Alex van Johnson") self.m(hn.initials_list(), ["A", "J"], hn) + def test_initials_delimiter_empty_string_kwarg(self) -> None: + # Regression: initials_delimiter='' was silently ignored due to `or` defaulting + hn = HumanName("Doe, John A.", initials_delimiter="") + self.m(hn.initials(), "J A D", hn) + + def test_initials_format_empty_string_kwarg(self) -> None: + # Regression: initials_format='' was silently ignored due to `or` defaulting + hn = HumanName("Doe, John A.") + hn2 = HumanName("Doe, John A.", initials_format="") + self.assertNotEqual(hn.initials(), hn2.initials()) + # "".format(...) returns ""; collapse_whitespace returns "" which falls through + # to empty_attribute_default (may be "" or None depending on config variant). + self.assertFalse(hn2.initials()) + + def test_initials_separator_kwarg(self) -> None: + # initials_separator="" with initials_format="{first}{middle}{last}" gives + # period-separated initials with no spaces — a common academic citation style + hn = HumanName( + "Doe, John A. Kenneth", + initials_separator="", + initials_format="{first}{middle}{last}", + ) + self.m(hn.initials(), "J.A.K.D.", hn) + + def test_initials_separator_custom_value(self) -> None: + # Non-empty custom separator exercising __process_initial__ on a multi-word + # token. "Van Berg" is a single name part whose two words produce two initials + # joined by initials_separator. + hn = HumanName("", initials_separator="-", initials_delimiter=".") + result = hn.__process_initial__("Van Berg", firstname=True) + self.assertEqual(result, "V-B") + + def test_str_default_behavior_unchanged(self) -> None: + # Regression guard for the `or` → `is not None` change in __str__: + # the default path (no string_format kwarg) must still produce the expected string. + hn = HumanName("John Doe") + self.assertEqual(str(hn), "John Doe") + def test_constructor_first(self) -> None: hn = HumanName(first="TheName") self.assertFalse(hn.unparsable) @@ -126,3 +170,43 @@ def test_constructor_multiple(self) -> None: self.m(hn.first, "TheName", hn) self.m(hn.last, "lastname", hn) self.m(hn.title, "mytitle", hn) + + def test_initials_separator_kwarg_multiword_part(self) -> None: + # Regression: initials_separator kwarg must flow into __process_initial__ + # for multi-word name parts, not just into the initials() join calls. + hn = HumanName("", initials_separator="") + result = hn.__process_initial__("Van Berg", firstname=True) + self.assertEqual(result, "VB") + + def test_string_format_empty_string_kwarg(self) -> None: + # Regression: string_format='' was silently ignored due to `or` defaulting + hn = HumanName("John Doe", string_format="") + self.assertEqual(str(hn), "") + + def test_initials_separator_empty_multi_part_middle(self) -> None: + # Full workflow from issue #152: empty delimiter + separator + compact format + # gives fully concatenated initials with no spaces or punctuation. + # Spaces between groups come from initials_format, so that must also be set. + hn = HumanName( + "Doe, John A. Kenneth", + initials_delimiter="", + initials_separator="", + initials_format="{first}{middle}{last}", + ) + self.m(hn.initials(), "JAKD", hn) + + def test_initials_separator_constants_multi_part_middle(self) -> None: + from nameparser.config import CONSTANTS + _orig_d = CONSTANTS.initials_delimiter + _orig_s = CONSTANTS.initials_separator + _orig_f = CONSTANTS.initials_format + try: + CONSTANTS.initials_delimiter = "" + CONSTANTS.initials_separator = "" + CONSTANTS.initials_format = "{first}{middle}{last}" + hn = HumanName("Doe, John A. Kenneth") + self.m(hn.initials(), "JAKD", hn) + finally: + CONSTANTS.initials_delimiter = _orig_d + CONSTANTS.initials_separator = _orig_s + CONSTANTS.initials_format = _orig_f