Skip to content

Commit 5ea10c6

Browse files
authored
Merge pull request #194 from derek73/worktree-middle-name-as-last
Add middle_name_as_last flag to fold middle names into last name (#133)
2 parents 1f2ce4e + 8d5c17f commit 5ea10c6

5 files changed

Lines changed: 194 additions & 0 deletions

File tree

docs/customize.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Other editable attributes
8888
* :py:obj:`~nameparser.config.Constants.suffix_delimiter` - additional delimiter used to split suffix groups after comma-splitting, e.g. ``" - "`` for names like ``"Jane Smith, RN - CRNA"``. Defaults to ``None`` (disabled).
8989
* :py:obj:`~nameparser.config.Constants.initials_separator` - string placed between consecutive initials within the same name group (after the delimiter). Defaults to ``" "``, so ``"A. K."``; set to ``""`` for compact ``"A.K."``.
9090
* :py:obj:`~nameparser.config.Constants.patronymic_name_order` - If set, detects Russian formal-order names (``Surname GivenName Patronymic``) via a trailing East-Slavic patronymic suffix and rotates the parts to Western order (``first=GivenName``, ``middle=Patronymic``, ``last=Surname``). Opt-in; see subsection below.
91+
* :py:obj:`~nameparser.config.Constants.middle_name_as_last` - If set, folds middle names into the last name (``.last`` becomes what ``.surnames`` already was, ``.middle`` becomes empty). Opt-in; see subsection below.
9192

9293

9394
Russian Formal Name Order
@@ -117,6 +118,29 @@ patronymic-form surnames such as ``"David Michael Abramovich"``. Enable this
117118
flag only when your data is predominantly Russian formal-order names.
118119

119120

121+
Suppressing Middle Names
122+
~~~~~~~~~~~~~~~~~~~~~~~~~
123+
124+
Some naming systems have no middle-name concept — everything after the given
125+
name is lineage or family (e.g. Arabic patronymic chaining: given + father +
126+
grandfather + family). Enable ``middle_name_as_last`` to fold the middle name
127+
into the last name instead of splitting them::
128+
129+
>>> from nameparser import HumanName
130+
>>> from nameparser.config import Constants
131+
>>> C = Constants(middle_name_as_last=True)
132+
>>> hn = HumanName("Mohamad Ahmad Ali Hassan", constants=C)
133+
>>> hn.first, hn.middle, hn.last
134+
('Mohamad', '', 'Ahmad Ali Hassan')
135+
136+
The fold applies uniformly to comma input too, so both written forms of a name
137+
converge on the same result::
138+
139+
>>> hn2 = HumanName("Hassan, Mohamad Ahmad Ali", constants=C)
140+
>>> hn2.first, hn2.last
141+
('Mohamad', 'Ahmad Ali Hassan')
142+
143+
120144
Splitting last-name prefix particles
121145
-------------------------------------
122146

docs/release_log.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Release Log
2929
``middle="ahmed"``, ``last="salem"``). Disable via
3030
``CONSTANTS.first_name_prefixes.clear()``. **Default-on: changes parsing
3131
output for names with these prefixes.** (#150)
32+
- Add ``middle_name_as_last`` flag to ``Constants`` and ``HumanName`` for opt-in folding of middle names into the last name, for naming systems with no middle-name concept (e.g. Arabic patronymic chaining) (#133)
3233
* 1.2.1 - June 19, 2026
3334
- Fix ``initials()`` interpolating the literal ``None`` for empty name parts when ``empty_attribute_default = None`` (e.g. ``"J. None D."``); empty parts now render as an empty string and a fully-empty result returns ``empty_attribute_default``
3435
- Add ``python -m nameparser "Name String"`` command-line helper that prints a parsed name

nameparser/config/__init__.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,32 @@ class Constants:
389389
390390
"""
391391

392+
middle_name_as_last = False
393+
"""
394+
If set, folds middle names into the last name: ``middle_list`` is prepended
395+
to ``last_list`` and ``middle_list`` is cleared, so ``.last`` becomes what
396+
``.surnames`` already was and ``.middle`` becomes empty. Useful for naming
397+
systems with no middle-name concept, where everything after the given name
398+
is lineage/family (e.g. Arabic patronymic chaining: given + father +
399+
grandfather + family).
400+
401+
The fold is uniform across both no-comma and comma ("Last, First Middle")
402+
input, so the two written forms of a name converge on the same result.
403+
404+
For per-instance control without a shared ``Constants``, pass a dedicated
405+
instance: ``HumanName("...", constants=Constants(middle_name_as_last=True))``.
406+
407+
.. doctest::
408+
409+
>>> from nameparser import HumanName
410+
>>> from nameparser.config import Constants
411+
>>> C = Constants(middle_name_as_last=True)
412+
>>> hn = HumanName("Mohamad Ahmad Ali Hassan", constants=C)
413+
>>> hn.first, hn.middle, hn.last
414+
('Mohamad', '', 'Ahmad Ali Hassan')
415+
416+
"""
417+
392418
def __init__(self,
393419
prefixes: Iterable[str] = PREFIXES,
394420
suffix_acronyms: Iterable[str] = SUFFIX_ACRONYMS,
@@ -401,6 +427,7 @@ def __init__(self,
401427
capitalization_exceptions: TupleManager[str] | Iterable[tuple[str, str]] = CAPITALIZATION_EXCEPTIONS,
402428
regexes: RegexTupleManager | TupleManager[re.Pattern[str]] | Iterable[tuple[str, re.Pattern[str]]] = REGEXES,
403429
patronymic_name_order: bool = False,
430+
middle_name_as_last: bool = False,
404431
) -> None:
405432
# These four descriptor assignments call _CachedUnionMember.__set__, which
406433
# calls _invalidate_pst() and establishes self._pst. They must come before
@@ -423,6 +450,7 @@ def __init__(self,
423450
# needing to override parse_nicknames() itself. See issue #112.
424451
self.extra_nickname_delimiters = TupleManager()
425452
self.patronymic_name_order = patronymic_name_order
453+
self.middle_name_as_last = middle_name_as_last
426454

427455
def _invalidate_pst(self) -> None:
428456
self._pst = None

nameparser/parser.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,15 @@ def handle_patronymic_name_order(self) -> None:
753753
self.first_list,
754754
)
755755

756+
def handle_middle_name_as_last(self) -> None:
757+
"""
758+
When middle_name_as_last is enabled, fold middle_list into last_list
759+
(prepended, preserving order) and clear middle_list. No-op when
760+
middle_list is already empty.
761+
"""
762+
self.last_list = self.middle_list + self.last_list
763+
self.middle_list = []
764+
756765
def post_process(self) -> None:
757766
"""
758767
This happens at the end of the :py:func:`parse_full_name` after
@@ -762,6 +771,8 @@ def post_process(self) -> None:
762771
self.handle_firstnames()
763772
if self.C.patronymic_name_order:
764773
self.handle_patronymic_name_order()
774+
if self.C.middle_name_as_last:
775+
self.handle_middle_name_as_last()
765776
self.handle_capitalization()
766777

767778
def fix_phd(self) -> None:

tests/test_middle_name_as_last.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
from nameparser import HumanName
2+
from nameparser.config import Constants
3+
from tests.base import HumanNameTestBase
4+
5+
6+
class MiddleNameAsLastFlagTests(HumanNameTestBase):
7+
8+
def test_default_is_false(self) -> None:
9+
C = Constants()
10+
assert C.middle_name_as_last is False
11+
12+
def test_can_set_true_via_constructor(self) -> None:
13+
C = Constants(middle_name_as_last=True)
14+
assert C.middle_name_as_last is True
15+
16+
def test_does_not_affect_other_instance(self) -> None:
17+
C1 = Constants(middle_name_as_last=True)
18+
C2 = Constants()
19+
assert C1.middle_name_as_last is True
20+
assert C2.middle_name_as_last is False
21+
22+
23+
class MiddleNameAsLastFoldTests(HumanNameTestBase):
24+
25+
def setup_method(self) -> None:
26+
self.C = Constants(middle_name_as_last=True)
27+
28+
def hn(self, name: str) -> HumanName:
29+
return HumanName(name, constants=self.C)
30+
31+
def test_fold_no_comma(self) -> None:
32+
n = self.hn("Mohamad Ahmad Ali Hassan")
33+
self.m(n.first, "Mohamad", n)
34+
self.m(n.middle, "", n)
35+
self.m(n.last, "Ahmad Ali Hassan", n)
36+
37+
def test_fold_comma_converges(self) -> None:
38+
no_comma = self.hn("Mohamad Ahmad Ali Hassan")
39+
comma = self.hn("Hassan, Mohamad Ahmad Ali")
40+
self.m(comma.first, no_comma.first, comma)
41+
self.m(comma.last, no_comma.last, comma)
42+
43+
def test_title_and_suffix_preserved(self) -> None:
44+
n = self.hn("Dr. Mohamad Ahmad Hassan Jr")
45+
self.m(n.title, "Dr.", n)
46+
self.m(n.last, "Ahmad Hassan", n)
47+
self.m(n.suffix, "Jr", n)
48+
49+
def test_suffix_preserved_comma_format(self) -> None:
50+
# Comma-delimited suffix takes a different code path than the
51+
# title/suffix no-comma case above; the fold must still apply.
52+
n = self.hn("Hassan, Mohamad Ahmad Ali, Jr.")
53+
self.m(n.first, "Mohamad", n)
54+
self.m(n.middle, "", n)
55+
self.m(n.last, "Ahmad Ali Hassan", n)
56+
self.m(n.suffix, "Jr.", n)
57+
58+
def test_nickname_preserved(self) -> None:
59+
# Nicknames are stripped in pre_process(), before the fold runs.
60+
n = self.hn('Mohamad "Mo" Ahmad Ali Hassan')
61+
self.m(n.nickname, "Mo", n)
62+
self.m(n.middle, "", n)
63+
self.m(n.last, "Ahmad Ali Hassan", n)
64+
65+
def test_no_middle_is_noop(self) -> None:
66+
n = self.hn("John Doe")
67+
self.m(n.first, "John", n)
68+
self.m(n.middle, "", n)
69+
self.m(n.last, "Doe", n)
70+
71+
def test_single_token_is_noop(self) -> None:
72+
n = self.hn("Cher")
73+
self.m(n.first, "Cher", n)
74+
self.m(n.middle, "", n)
75+
self.m(n.last, "", n)
76+
77+
def test_given_names_and_surnames_track_fold(self) -> None:
78+
n = self.hn("Mohamad Ahmad Ali Hassan")
79+
self.m(n.given_names, n.first, n)
80+
self.m(n.surnames, n.last, n)
81+
82+
def test_last_prefixes_still_split_after_fold(self) -> None:
83+
# Unfolded this is first="Miguel", middle="da Silva do Amaral",
84+
# last="de Souza" (last_prefixes="de"). Folded, last_list becomes
85+
# ["da","Silva","do","Amaral","de","Souza"]; _split_last() strips
86+
# leading contiguous prefix words from the start, so only the
87+
# leading "da" is stripped ("Silva" is not a prefix, so scanning
88+
# stops there) — last_prefixes="da", not "de".
89+
n = self.hn("Miguel da Silva do Amaral de Souza")
90+
self.m(n.last_prefixes, "da", n)
91+
92+
93+
class MiddleNameAsLastFlagOffTests(HumanNameTestBase):
94+
95+
def test_default_constants_unaffected(self) -> None:
96+
n = HumanName("Mohamad Ahmad Ali Hassan")
97+
self.m(n.middle, "Ahmad Ali", n)
98+
self.m(n.last, "Hassan", n)
99+
100+
101+
class MiddleNameAsLastWithPatronymicOrderTests(HumanNameTestBase):
102+
"""Both localization flags on: patronymic reordering must settle
103+
first/middle/last before the fold collapses middle into last, per the
104+
design's stated ordering rationale (post_process() runs the patronymic
105+
hook before the middle_name_as_last hook)."""
106+
107+
def setup_method(self) -> None:
108+
self.C = Constants(middle_name_as_last=True, patronymic_name_order=True)
109+
110+
def hn(self, name: str) -> HumanName:
111+
return HumanName(name, constants=self.C)
112+
113+
def test_rotate_then_fold_no_comma(self) -> None:
114+
# patronymic_name_order rotates "Ivanov Petr Sergeyevich" to
115+
# first=Petr, middle=Sergeyevich, last=Ivanov; the fold then
116+
# collapses that settled middle into last.
117+
n = self.hn("Ivanov Petr Sergeyevich")
118+
self.m(n.first, "Petr", n)
119+
self.m(n.middle, "", n)
120+
self.m(n.last, "Sergeyevich Ivanov", n)
121+
122+
def test_fold_applies_even_when_comma_suppresses_rotation(self) -> None:
123+
# A comma suppresses patronymic_name_order's rotation (_had_comma
124+
# guard), so middle stays "Sergeyevich" unrotated going into the
125+
# fold. The fold still absorbs it into last, producing the same
126+
# first/last as the no-comma case above via a different mechanism.
127+
n = self.hn("Ivanov, Petr Sergeyevich")
128+
self.m(n.first, "Petr", n)
129+
self.m(n.middle, "", n)
130+
self.m(n.last, "Sergeyevich Ivanov", n)

0 commit comments

Comments
 (0)