Skip to content
Merged
37 changes: 37 additions & 0 deletions docs/customize.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,43 @@ Other editable attributes
* :py:obj:`~nameparser.config.Constants.initials_separator` - string placed between consecutive initials within the same name group (after the delimiter). Defaults to ``" "``, so ``"A. K."``; set to ``""`` for compact ``"A.K."``.


Splitting last-name prefix particles
-------------------------------------

The :py:attr:`~nameparser.parser.HumanName.last_base` and
:py:attr:`~nameparser.parser.HumanName.last_prefixes` properties split the last
name at the boundary between leading prefix particles and the core surname. They
use the same ``PREFIXES`` set, so adding a particle makes the split pick it up
automatically::

>>> from nameparser import HumanName
>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.prefixes.add('op')
>>> HumanName("Jan op den Berg").last_base
'Berg'
>>> HumanName("Jan op den Berg").last_prefixes
'op den'
>>> CONSTANTS.prefixes.remove('op')

Note the ``remove`` call at the end — ``customize.rst`` examples share global
``CONSTANTS``, so mutations must be reversed to avoid affecting later examples.

Because ``last_base`` is a plain string property, sorting a list of names by
core surname (ignoring prefix particles like *van*, *de la*) is just a key
function::

names = [
HumanName("Vincent van Gogh"),
HumanName("Juan de la Vega"),
HumanName("John Smith"),
]
sorted_names = sorted(names, key=lambda n: n.last_base.lower())
# sort keys: 'gogh', 'smith', 'vega' → van Gogh, Smith, de la Vega

To sort by first name when two people share the same ``last_base``, add it as
a secondary key::

sorted_names = sorted(names, key=lambda n: (n.last_base.lower(), n.first.lower()))

Parser Customization Examples
-----------------------------
Expand Down
1 change: 1 addition & 0 deletions docs/release_log.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Release Log
- Fix spurious leading space in surnames and empty token in suffix list after ``capitalize()`` with an empty middle or suffix (#164)
- Fix extra whitespace before punctuation in ``str()`` output when a ``string_format`` field is empty (closes #139)
- Fix ``'apn aprn'`` split into separate ``suffix_acronyms`` entries so each is recognized independently (closes #155)
- Add ``last_base``, ``last_prefixes`` (and ``_list`` variants) plus ``family`` / ``family_prefixes`` aliases for splitting last-name prefix particles (tussenvoegsels) from the core surname (#130, #132)
* 1.2.1 - June 19, 2026
- Fix ``initials()`` interpolating the literal ``None`` for empty name parts when ``empty_attribute_default = None`` (e.g. ``"J. None D."``); empty parts now render as an empty string and a fully-empty result returns ``empty_attribute_default``
- Add ``python -m nameparser "Name String"`` command-line helper that prints a parsed name
Expand Down
6 changes: 6 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ Requires Python 3.10+.
'Q. Xavier'
>>> name.last
'de la Vega'
>>> name.last_base
'Vega'
>>> name.last_prefixes
'de la'
>>> name.family
'Vega'
>>> name.suffix
'III'
>>> name.surnames
Expand Down
13 changes: 11 additions & 2 deletions nameparser/config/prefixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,20 @@
#: join with all following name pieces until the suffix "MD", resulting in the
#: correct parsing of the last name "von bergen wessels".
PREFIXES = set([
"'t",
'abu',
'af',
'al',
'av',
'bar',
'bat',
'bin',
'bint',
'bon',
'da',
'dal',
'de',
'de\'',
"de'",
'degli',
'dei',
'del',
Expand All @@ -24,6 +30,7 @@
'delle',
'delli',
'dello',
'den',
'der',
'di',
'dí',
Expand All @@ -39,9 +46,11 @@
'santa',
'st',
'ste',
'ter',
'van',
'vander',
'vel',
'von',
'vom',
'von',
'zu',
])
88 changes: 88 additions & 0 deletions nameparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,94 @@ def given_names(self) -> str:
"""
return " ".join(self.given_names_list) or self.C.empty_attribute_default

def _split_last(self) -> tuple[list[str], list[str]]:
"""Return (prefix_particles, base_words) split from the last name.

The base_words list is never empty: if every word in the last name
matches a prefix particle, the guard fires and all words are returned
as the base with an empty prefix list (heuristic: a family name is
assumed not to consist entirely of particles).

>>> HumanName("Vincent van Gogh")._split_last()
(['van'], ['Gogh'])
>>> HumanName("Anh Do")._split_last()
([], ['Do'])
"""
words = " ".join(self.last_list).split()
i = 0
while i < len(words) and self.is_prefix(words[i]):
i += 1
if i == len(words):
# Heuristic: assume a family name isn't entirely composed of
# particles (e.g. surname "Do" which also appears in PREFIXES).
# Don't strip — treat the whole last name as the base.
return [], words
return words[:i], words[i:]

@property
def last_prefixes_list(self) -> list[str]:
"""
List of leading prefix particles in the last name (the *tussenvoegsel*).
Returns ``[]`` when there are none, including the case where every word
in the last name matches a prefix — see :py:meth:`_split_last`.

>>> HumanName("Juan de la Vega").last_prefixes_list
['de', 'la']
"""
return self._split_last()[0]

@property
def last_base_list(self) -> list[str]:
"""
List of last-name words after stripping leading prefix particles.
Never empty: when every word matches a prefix, no stripping occurs and
the full last name is returned — see :py:meth:`_split_last`.

>>> HumanName("Vincent van Gogh").last_base_list
['Gogh']
"""
return self._split_last()[1]

@property
def last_base(self) -> str:
"""
The last name with leading prefix particles removed (the core surname).
For ``"van Gogh"`` this is ``"Gogh"``; for ``"Smith"`` it is ``"Smith"``.
``last`` is always unchanged. When every word in the last name matches a
prefix particle, no stripping occurs and the full last name is returned.

>>> HumanName("Vincent van Gogh").last_base
'Gogh'
>>> HumanName("John Smith").last_base
'Smith'
"""
return " ".join(self.last_base_list) or self.C.empty_attribute_default

@property
def last_prefixes(self) -> str:
"""
The leading prefix particle(s) of the last name (the *tussenvoegsel*).
Returns ``""`` (or ``empty_attribute_default``) when there are none,
including when every word in the last name matches a prefix particle
(the all-particles guard; see :py:meth:`_split_last`).

>>> HumanName("Vincent van Gogh").last_prefixes
'van'
>>> HumanName("Juan de la Vega").last_prefixes
'de la'
"""
return " ".join(self.last_prefixes_list) or self.C.empty_attribute_default

@property
def family(self) -> str:
"""Alias for :py:attr:`last_base`."""
return self.last_base

@property
def family_prefixes(self) -> str:
"""Alias for :py:attr:`last_prefixes`."""
return self.last_prefixes

# setter methods

def _set_list(self, attr: str, value: str | list[str] | None) -> None:
Expand Down
78 changes: 78 additions & 0 deletions tests/test_prefixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,81 @@ def test_comma_three_conjunctions(self) -> None:
self.m(hn.title, "Dr.", hn)
self.m(hn.middle, "Q. Xavier", hn)
self.m(hn.suffix, "III", hn)


class LastNamePrefixSplitTestCase(HumanNameTestBase):

def test_van_gogh_last_base(self) -> None:
hn = HumanName("Vincent van Gogh")
self.m(hn.last_base, "Gogh", hn)

def test_van_gogh_last_prefixes(self) -> None:
hn = HumanName("Vincent van Gogh")
self.m(hn.last_prefixes, "van", hn)

def test_van_gogh_last_base_list(self) -> None:
hn = HumanName("Vincent van Gogh")
self.m(hn.last_base_list, ["Gogh"], hn)

def test_van_gogh_last_prefixes_list(self) -> None:
hn = HumanName("Vincent van Gogh")
self.m(hn.last_prefixes_list, ["van"], hn)

def test_von_bergen_wessels(self) -> None:
hn = HumanName("pennie von bergen wessels")
self.m(hn.last_base, "bergen wessels", hn)
self.m(hn.last_prefixes, "von", hn)
self.m(hn.last_base_list, ["bergen", "wessels"], hn)
self.m(hn.last_prefixes_list, ["von"], hn)

def test_de_la_vega_multiword_prefix(self) -> None:
hn = HumanName("Juan de la Vega")
self.m(hn.last_base, "Vega", hn)
self.m(hn.last_prefixes, "de la", hn)
self.m(hn.last_prefixes_list, ["de", "la"], hn)

def test_no_prefix(self) -> None:
hn = HumanName("John Smith")
self.m(hn.last_base, "Smith", hn)
self.m(hn.last_prefixes, "", hn)
# self.m() coerces [] via `expected or empty_attribute_default`; use assertEqual for empty lists
self.assertEqual(hn.last_prefixes_list, [])

def test_do_guard_surname_equals_prefix_word(self) -> None:
# "Do" is in PREFIXES; without the guard last_base would be empty
hn = HumanName("Anh Do")
self.m(hn.last_base, "Do", hn)
self.m(hn.last_prefixes, "", hn)

def test_all_particles_guard(self) -> None:
# Artificial case: last name whose every word is a prefix — must not strip
hn = HumanName("Smith van der")
# last="van der"; both words are prefixes — guard fires, base = full last
self.m(hn.last_base, hn.last, hn)
self.m(hn.last_prefixes, "", hn)

def test_alias_family_equals_last_base(self) -> None:
hn = HumanName("Vincent van Gogh")
self.m(hn.family, hn.last_base, hn)

def test_alias_family_prefixes_equals_last_prefixes(self) -> None:
hn = HumanName("Vincent van Gogh")
self.m(hn.family_prefixes, hn.last_prefixes, hn)

def test_da_silva_title_plus_prefix(self) -> None:
hn = HumanName("Dra. Andréia da Silva")
self.m(hn.last_base, "Silva", hn)
self.m(hn.last_prefixes, "da", hn)

def test_empty_name(self) -> None:
hn = HumanName()
self.m(hn.last_base, "", hn)
self.m(hn.last_prefixes, "", hn)
# self.m() coerces [] via `expected or empty_attribute_default`; use assertEqual for empty lists
self.assertEqual(hn.last_base_list, [])
self.assertEqual(hn.last_prefixes_list, [])

def test_case_insensitive_prefix_detection(self) -> None:
hn = HumanName("VINCENT VAN GOGH")
self.m(hn.last_prefixes, "VAN", hn)
self.m(hn.last_base, "GOGH", hn)