From 67d8e5bb9e90a103ef252290902a17437aefa216 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Sun, 28 Jun 2026 18:18:27 -0700 Subject: [PATCH 1/3] fix: add initials_separator; fix or-defaulting for format/delimiter kwargs (closes #152) - Add initials_separator = " " to Constants: controls the joiner between consecutive initials within a name group, distinct from initials_delimiter which is the trailing character after each individual initial - Add initials_separator kwarg to HumanName.__init__ - Fix or-defaulting to is-not-None for string_format, initials_format, and initials_delimiter kwargs so empty string '' is accepted as a valid value - Use initials_separator in __process_initial__ and initials() in place of hardcoded " " - Document initials_separator in usage.rst with examples Co-Authored-By: Claude Sonnet 4.6 --- docs/usage.rst | 21 ++++++++++-- nameparser/config/__init__.py | 11 ++++++ nameparser/parser.py | 42 +++++++++++++---------- tests/test_initials.py | 63 +++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 19 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index c781703..caaa203 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -203,8 +203,25 @@ Furthermore, the delimiter for the string output can be set through: >>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter=";").initials() 'J; A; K; D;' - >>> HumanName("Doe, John A. Kenneth, Jr.", initials_format="{first}{middle}{last}", initials_delimiter=".").initials() - 'J.A. K.D.' + +The separator between consecutive initials *within* a name group (e.g. two middle +names) is controlled by :py:attr:`~nameparser.config.Constants.initials_separator`, +which defaults to ``" "``. Setting it to ``""`` removes that space. + +``initials_delimiter``, ``initials_separator``, and ``initials_format`` work together: + +- ``initials_delimiter`` — appended *after* each individual initial (default ``"."``) +- ``initials_separator`` — placed *between* consecutive initials in the same group (default ``" "``) +- ``initials_format`` — controls how the first, middle, and last groups are arranged + +For example, to produce compact period-separated initials with no spaces: + +.. doctest:: initials separator + + >>> HumanName("Doe, John A. Kenneth, Jr.", initials_separator="", initials_format="{first}{middle}{last}").initials() + 'J.A.K.D.' + >>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter="", initials_separator="", initials_format="{first}{middle}{last}").initials() + 'JAKD' To get a list representation of the initials, use :py:meth:`~nameparser.HumanName.initials_list`. This function is unaffected by :py:attr:`~nameparser.config.Constants.initials_format` diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index d2a0370..b766afc 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -215,6 +215,17 @@ class Constants: Will be used to add a delimiter between each initial. """ + initials_separator = " " + """ + The default separator placed between consecutive initials within a name + group (first, middle, or last). Distinct from ``initials_delimiter``, + which is the trailing character after each individual initial. + + With defaults ``initials_delimiter="."`` and ``initials_separator=" "``, + ``initials()`` produces ``"J. A. D."``. Setting ``initials_separator=""`` + with ``initials_delimiter="."`` produces ``"J.A.D."``. + """ + empty_attribute_default = '' """ Default return value for empty attributes. diff --git a/nameparser/parser.py b/nameparser/parser.py index b0cc008..94ac5c6 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -94,6 +94,7 @@ def __init__( string_format: str | None = None, initials_format: str | None = None, initials_delimiter: str | None = None, + initials_separator: str | None = None, first: str | list[str] | None = None, middle: str | list[str] | None = None, last: str | list[str] | None = None, @@ -106,9 +107,10 @@ def __init__( self.C = Constants() self.encoding = encoding - self.string_format = string_format or self.C.string_format - self.initials_format = initials_format or self.C.initials_format - self.initials_delimiter = initials_delimiter or self.C.initials_delimiter + self.string_format = string_format if string_format is not None else self.C.string_format + self.initials_format = initials_format if initials_format is not None else self.C.initials_format + self.initials_delimiter = initials_delimiter if initials_delimiter is not None else self.C.initials_delimiter + self.initials_separator = initials_separator if initials_separator is not None else self.C.initials_separator if (first or middle or last or title or suffix or nickname): self.first = first self.middle = middle @@ -241,7 +243,7 @@ def __process_initial__(self, name_part: str, firstname: bool = False) -> str: if not (self.is_prefix(part) or self.is_conjunction(part)) or firstname: initials.append(part[0]) if len(initials) > 0: - return " ".join(initials) + return self.C.initials_separator.join(initials) else: return self.C.empty_attribute_default @@ -265,19 +267,25 @@ def initials_list(self) -> list[str]: def initials(self) -> str: """ - Return period-delimited initials of the first, middle and optionally last name. + Return formatted initials for the name, controlled by + ``initials_format``, ``initials_delimiter``, and ``initials_separator``. - :param bool include_last_name: Include the last name as part of the initials - :rtype: str + ``initials_delimiter`` is appended after each individual initial. + ``initials_separator`` is placed between consecutive initials within + a name group (first, middle, or last). Both can be set as + ``Constants`` attributes or as ``HumanName`` constructor kwargs. - .. doctest:: + .. doctest:: - >>> name = HumanName("Sir Bob Andrew Dole") - >>> name.initials() - "B. A. D." - >>> name = HumanName("Sir Bob Andrew Dole", initials_format="{first} {middle}") - >>> name.initials() - "B. A." + >>> name = HumanName("Sir Bob Andrew Dole") + >>> name.initials() + "B. A. D." + >>> name = HumanName("Sir Bob Andrew Dole", initials_format="{first} {middle}") + >>> name.initials() + "B. A." + >>> name = HumanName("Doe, John A.", initials_delimiter="", initials_separator="") + >>> name.initials() + "J A D" """ first_initials_list = [self.__process_initial__(name, True) for name in self.first_list if name] @@ -289,11 +297,11 @@ def initials(self) -> str: # output. A fully-empty result falls back to empty_attribute_default, # matching the other attribute accessors (e.g. ``first``). initials_dict = { - "first": (self.initials_delimiter + " ").join(first_initials_list) + self.initials_delimiter + "first": (self.initials_delimiter + self.initials_separator).join(first_initials_list) + self.initials_delimiter if len(first_initials_list) else "", - "middle": (self.initials_delimiter + " ").join(middle_initials_list) + self.initials_delimiter + "middle": (self.initials_delimiter + self.initials_separator).join(middle_initials_list) + self.initials_delimiter if len(middle_initials_list) else "", - "last": (self.initials_delimiter + " ").join(last_initials_list) + self.initials_delimiter + "last": (self.initials_delimiter + self.initials_separator).join(last_initials_list) + self.initials_delimiter if len(last_initials_list) else "" } diff --git a/tests/test_initials.py b/tests/test_initials.py index bccf8b4..752873a 100644 --- a/tests/test_initials.py +++ b/tests/test_initials.py @@ -74,6 +74,10 @@ def test_initials_delimiter_constants(self) -> None: self.m(hn.initials(), "J; A; K; D;", hn) CONSTANTS.initials_delimiter = _orig + def test_initials_separator_default_on_constants(self) -> None: + from nameparser.config import CONSTANTS + self.assertEqual(CONSTANTS.initials_separator, " ") + def test_initials_list(self) -> None: hn = HumanName("Andrew Boris Petersen") self.m(hn.initials_list(), ["A", "B", "P"], hn) @@ -90,6 +94,30 @@ def test_initials_with_prefix(self) -> None: hn = HumanName("Alex van Johnson") self.m(hn.initials_list(), ["A", "J"], hn) + def test_initials_delimiter_empty_string_kwarg(self) -> None: + # Regression: initials_delimiter='' was silently ignored due to `or` defaulting + hn = HumanName("Doe, John A.", initials_delimiter="") + self.m(hn.initials(), "J A D", hn) + + def test_initials_format_empty_string_kwarg(self) -> None: + # Regression: initials_format='' was silently ignored due to `or` defaulting + hn = HumanName("Doe, John A.") + hn2 = HumanName("Doe, John A.", initials_format="") + assert hn.initials() != hn2.initials() + # When format is empty string, result should be either "" or empty_attribute_default + result = hn2.initials() + assert result == "" or result == hn2.C.empty_attribute_default + + def test_initials_separator_kwarg(self) -> None: + # initials_separator="" with initials_format="{first}{middle}{last}" gives + # period-separated initials with no spaces — a common academic citation style + hn = HumanName( + "Doe, John A. Kenneth", + initials_separator="", + initials_format="{first}{middle}{last}", + ) + self.m(hn.initials(), "J.A.K.D.", hn) + def test_constructor_first(self) -> None: hn = HumanName(first="TheName") self.assertFalse(hn.unparsable) @@ -126,3 +154,38 @@ def test_constructor_multiple(self) -> None: self.m(hn.first, "TheName", hn) self.m(hn.last, "lastname", hn) self.m(hn.title, "mytitle", hn) + + def test_initials_separator_multiword_name_part(self) -> None: + # __process_initial__ splits on spaces internally for multi-word tokens; + # initials_separator must flow through there too. + hn = HumanName("", constants=None) + hn.C.initials_separator = "" + # Directly exercise __process_initial__ with a two-word part + result = hn.__process_initial__("Van Berg", firstname=True) + self.assertEqual(result, "VB") + + def test_initials_separator_empty_multi_part_middle(self) -> None: + # Full workflow from issue #152: empty delimiter + separator + compact format + # gives fully concatenated initials with no spaces or punctuation. + # Spaces between groups come from initials_format, so that must also be set. + hn = HumanName( + "Doe, John A. Kenneth", + initials_delimiter="", + initials_separator="", + initials_format="{first}{middle}{last}", + ) + self.m(hn.initials(), "JAKD", hn) + + def test_initials_separator_constants_multi_part_middle(self) -> None: + from nameparser.config import CONSTANTS + _orig_d = CONSTANTS.initials_delimiter + _orig_s = CONSTANTS.initials_separator + _orig_f = CONSTANTS.initials_format + CONSTANTS.initials_delimiter = "" + CONSTANTS.initials_separator = "" + CONSTANTS.initials_format = "{first}{middle}{last}" + hn = HumanName("Doe, John A. Kenneth") + self.m(hn.initials(), "JAKD", hn) + CONSTANTS.initials_delimiter = _orig_d + CONSTANTS.initials_separator = _orig_s + CONSTANTS.initials_format = _orig_f From ed604d3df42e5a42aa963de30deececa90948373 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Sun, 28 Jun 2026 18:36:02 -0700 Subject: [PATCH 2/3] fix: honor initials_separator kwarg in __process_initial__; fix string_format='' in __str__ - `__process_initial__` was reading `self.C.initials_separator` (Constants) instead of `self.initials_separator` (per-instance), so the constructor kwarg was silently ignored for multi-word name parts - `__str__` used `if self.string_format:` (truthiness) so `string_format=""` fell through to the default format despite the assignment fix in the PR - Add regression tests for both kwarg paths through the affected code - Fix `test_initials_separator_multiword_name_part` to set instance attr instead of Constants attr, so it tests the now-correct code path - Correct `Constants.initials_separator` docstring example (requires initials_format="{first}{middle}{last}" to produce "J.A.D.") - Clarify usage.rst: initials_separator only removes intra-group spaces; inter-group spacing is still governed by initials_format Co-Authored-By: Claude Sonnet 4.6 --- docs/usage.rst | 5 +++-- nameparser/config/__init__.py | 4 +++- nameparser/parser.py | 4 ++-- tests/test_initials.py | 14 +++++++++++++- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index caaa203..5ab1d28 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -206,12 +206,13 @@ Furthermore, the delimiter for the string output can be set through: The separator between consecutive initials *within* a name group (e.g. two middle names) is controlled by :py:attr:`~nameparser.config.Constants.initials_separator`, -which defaults to ``" "``. Setting it to ``""`` removes that space. +which defaults to ``" "``. Setting it to ``""`` removes that space within a group; +spacing *between* groups is still governed by ``initials_format``. ``initials_delimiter``, ``initials_separator``, and ``initials_format`` work together: - ``initials_delimiter`` — appended *after* each individual initial (default ``"."``) -- ``initials_separator`` — placed *between* consecutive initials in the same group (default ``" "``) +- ``initials_separator`` — placed *after* the delimiter between consecutive initials in the same group (default ``" "``), so with ``delimiter="."`` and ``separator=" "`` you get ``A. K.`` - ``initials_format`` — controls how the first, middle, and last groups are arranged For example, to produce compact period-separated initials with no spaces: diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index b766afc..6dd591c 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -223,7 +223,9 @@ class Constants: With defaults ``initials_delimiter="."`` and ``initials_separator=" "``, ``initials()`` produces ``"J. A. D."``. Setting ``initials_separator=""`` - with ``initials_delimiter="."`` produces ``"J.A.D."``. + with ``initials_delimiter="."`` and ``initials_format="{first}{middle}{last}"`` + produces ``"J.A.D."``. With the default ``initials_format``, group-level + spacing from the template is still applied. """ empty_attribute_default = '' diff --git a/nameparser/parser.py b/nameparser/parser.py index 94ac5c6..4e9d188 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -179,7 +179,7 @@ def __next__(self) -> str: return getattr(self, self._members[c]) or next(self) def __str__(self) -> str: - if self.string_format: + if self.string_format is not None: # string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" _s = self.string_format.format(**self.as_dict()) # remove trailing punctuation from missing nicknames @@ -243,7 +243,7 @@ def __process_initial__(self, name_part: str, firstname: bool = False) -> str: if not (self.is_prefix(part) or self.is_conjunction(part)) or firstname: initials.append(part[0]) if len(initials) > 0: - return self.C.initials_separator.join(initials) + return self.initials_separator.join(initials) else: return self.C.empty_attribute_default diff --git a/tests/test_initials.py b/tests/test_initials.py index 752873a..0b3f0cf 100644 --- a/tests/test_initials.py +++ b/tests/test_initials.py @@ -155,11 +155,23 @@ def test_constructor_multiple(self) -> None: self.m(hn.last, "lastname", hn) self.m(hn.title, "mytitle", hn) + def test_initials_separator_kwarg_multiword_part(self) -> None: + # Regression: initials_separator kwarg must flow into __process_initial__ + # for multi-word name parts, not just into the initials() join calls. + hn = HumanName("", initials_separator="") + result = hn.__process_initial__("Van Berg", firstname=True) + self.assertEqual(result, "VB") + + def test_string_format_empty_string_kwarg(self) -> None: + # Regression: string_format='' was silently ignored due to `or` defaulting + hn = HumanName("John Doe", string_format="") + self.assertEqual(str(hn), "") + def test_initials_separator_multiword_name_part(self) -> None: # __process_initial__ splits on spaces internally for multi-word tokens; # initials_separator must flow through there too. hn = HumanName("", constants=None) - hn.C.initials_separator = "" + hn.initials_separator = "" # Directly exercise __process_initial__ with a two-word part result = hn.__process_initial__("Van Berg", firstname=True) self.assertEqual(result, "VB") From e626e25c8411698a09457555157b8e0f2ec4f05d Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Sun, 28 Jun 2026 18:57:12 -0700 Subject: [PATCH 3/3] test: strengthen test suite after PR review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add assertNotEqual shim to HumanNameTestBase - Tighten test_initials_format_empty_string_kwarg: use assertNotEqual + assertFalse instead of vacuous or-equality check - Add test_initials_separator_custom_value: non-empty separator on a multi-word token via __process_initial__ - Add test_str_default_behavior_unchanged: regression guard for the or→is-not-None __str__ fix - Remove duplicate test_initials_separator_multiword_name_part - Wrap CONSTANTS mutation tests in try/finally to ensure state is restored even when an assertion fails Co-Authored-By: Claude Sonnet 4.6 --- tests/base.py | 3 +++ tests/test_initials.py | 59 ++++++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/tests/base.py b/tests/base.py index 6b09ea1..254cfef 100644 --- a/tests/base.py +++ b/tests/base.py @@ -39,6 +39,9 @@ def assertFalse(self, expr: object, msg: object = None) -> None: def assertIn(self, member: object, container: object, msg: object = None) -> None: assert member in container, msg # type: ignore[operator] + def assertNotEqual(self, first: object, second: object, msg: object = None) -> None: + assert first != second, msg + def assertNotIn(self, member: object, container: object, msg: object = None) -> None: assert member not in container, msg # type: ignore[operator] diff --git a/tests/test_initials.py b/tests/test_initials.py index 0b3f0cf..6c410ff 100644 --- a/tests/test_initials.py +++ b/tests/test_initials.py @@ -69,10 +69,12 @@ def test_initials_delimiter(self) -> None: def test_initials_delimiter_constants(self) -> None: from nameparser.config import CONSTANTS _orig = CONSTANTS.initials_delimiter - CONSTANTS.initials_delimiter = ";" - hn = HumanName("Doe, John A. Kenneth, Jr.") - self.m(hn.initials(), "J; A; K; D;", hn) - CONSTANTS.initials_delimiter = _orig + try: + CONSTANTS.initials_delimiter = ";" + hn = HumanName("Doe, John A. Kenneth, Jr.") + self.m(hn.initials(), "J; A; K; D;", hn) + finally: + CONSTANTS.initials_delimiter = _orig def test_initials_separator_default_on_constants(self) -> None: from nameparser.config import CONSTANTS @@ -103,10 +105,10 @@ def test_initials_format_empty_string_kwarg(self) -> None: # Regression: initials_format='' was silently ignored due to `or` defaulting hn = HumanName("Doe, John A.") hn2 = HumanName("Doe, John A.", initials_format="") - assert hn.initials() != hn2.initials() - # When format is empty string, result should be either "" or empty_attribute_default - result = hn2.initials() - assert result == "" or result == hn2.C.empty_attribute_default + self.assertNotEqual(hn.initials(), hn2.initials()) + # "".format(...) returns ""; collapse_whitespace returns "" which falls through + # to empty_attribute_default (may be "" or None depending on config variant). + self.assertFalse(hn2.initials()) def test_initials_separator_kwarg(self) -> None: # initials_separator="" with initials_format="{first}{middle}{last}" gives @@ -118,6 +120,20 @@ def test_initials_separator_kwarg(self) -> None: ) self.m(hn.initials(), "J.A.K.D.", hn) + def test_initials_separator_custom_value(self) -> None: + # Non-empty custom separator exercising __process_initial__ on a multi-word + # token. "Van Berg" is a single name part whose two words produce two initials + # joined by initials_separator. + hn = HumanName("", initials_separator="-", initials_delimiter=".") + result = hn.__process_initial__("Van Berg", firstname=True) + self.assertEqual(result, "V-B") + + def test_str_default_behavior_unchanged(self) -> None: + # Regression guard for the `or` → `is not None` change in __str__: + # the default path (no string_format kwarg) must still produce the expected string. + hn = HumanName("John Doe") + self.assertEqual(str(hn), "John Doe") + def test_constructor_first(self) -> None: hn = HumanName(first="TheName") self.assertFalse(hn.unparsable) @@ -167,15 +183,6 @@ def test_string_format_empty_string_kwarg(self) -> None: hn = HumanName("John Doe", string_format="") self.assertEqual(str(hn), "") - def test_initials_separator_multiword_name_part(self) -> None: - # __process_initial__ splits on spaces internally for multi-word tokens; - # initials_separator must flow through there too. - hn = HumanName("", constants=None) - hn.initials_separator = "" - # Directly exercise __process_initial__ with a two-word part - result = hn.__process_initial__("Van Berg", firstname=True) - self.assertEqual(result, "VB") - def test_initials_separator_empty_multi_part_middle(self) -> None: # Full workflow from issue #152: empty delimiter + separator + compact format # gives fully concatenated initials with no spaces or punctuation. @@ -193,11 +200,13 @@ def test_initials_separator_constants_multi_part_middle(self) -> None: _orig_d = CONSTANTS.initials_delimiter _orig_s = CONSTANTS.initials_separator _orig_f = CONSTANTS.initials_format - CONSTANTS.initials_delimiter = "" - CONSTANTS.initials_separator = "" - CONSTANTS.initials_format = "{first}{middle}{last}" - hn = HumanName("Doe, John A. Kenneth") - self.m(hn.initials(), "JAKD", hn) - CONSTANTS.initials_delimiter = _orig_d - CONSTANTS.initials_separator = _orig_s - CONSTANTS.initials_format = _orig_f + try: + CONSTANTS.initials_delimiter = "" + CONSTANTS.initials_separator = "" + CONSTANTS.initials_format = "{first}{middle}{last}" + hn = HumanName("Doe, John A. Kenneth") + self.m(hn.initials(), "JAKD", hn) + finally: + CONSTANTS.initials_delimiter = _orig_d + CONSTANTS.initials_separator = _orig_s + CONSTANTS.initials_format = _orig_f