Skip to content

Commit 8c75919

Browse files
authored
Merge pull request #191 from derek73/add-german-dutch-prefixes-titles
Add German/Dutch prefixes and German title/degree suffixes
2 parents 2e55728 + e739655 commit 8c75919

5 files changed

Lines changed: 94 additions & 0 deletions

File tree

nameparser/config/prefixes.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@
99
#: correct parsing of the last name "von bergen wessels".
1010
PREFIXES = set([
1111
"'t",
12+
'aan',
13+
'aen',
1214
'abu',
1315
'af',
1416
'al',
17+
'auf',
1518
'av',
1619
'bar',
1720
'bat',
@@ -30,25 +33,36 @@
3033
'delle',
3134
'delli',
3235
'dello',
36+
'dem',
3337
'den',
3438
'der',
3539
'di',
3640
'dí',
3741
'do',
3842
'dos',
3943
'du',
44+
'freiherr',
45+
'freiherrin',
46+
'heer',
47+
'het',
4048
'ibn',
4149
'la',
4250
'le',
4351
'mac',
4452
'mc',
53+
'op',
4554
'san',
4655
'santa',
4756
'st',
4857
'ste',
58+
'te',
4959
'ter',
60+
'tho',
61+
'thoe',
5062
'van',
63+
'vande',
5164
'vander',
65+
'vd',
5266
'vel',
5367
'vom',
5468
'von',

nameparser/config/suffixes.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
'asp',
9898
'atc',
9999
'awb',
100+
'ba',
100101
'bca',
101102
'bcl',
102103
'bcss',
@@ -107,6 +108,7 @@
107108
'bpe',
108109
'bpi',
109110
'bpt',
111+
'bsc',
110112
'bt',
111113
'btcs',
112114
'bts',
@@ -507,8 +509,15 @@
507509
'mcse',
508510
'mct',
509511
'md',
512+
'mda',
513+
'mdb',
514+
'mdbb',
515+
'mdep',
516+
'mdhb',
510517
'mdiv',
518+
'mdl',
511519
'mem',
520+
'meng',
512521
'mfa',
513522
'micp',
514523
'mieee',
@@ -653,6 +662,7 @@
653662
'sphr',
654663
'sra',
655664
'sscp',
665+
'stb',
656666
'stmieee',
657667
'tbr-ct',
658668
'td',

nameparser/config/titles.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@
231231
'detective',
232232
'developer',
233233
'dhr',
234+
'dipl.-ing',
234235
'diplomat',
235236
'dir',
236237
'director',
@@ -282,6 +283,7 @@
282283
'fadm',
283284
'family',
284285
'federal',
286+
'fh-prof',
285287
'field',
286288
'film',
287289
'financial',
@@ -312,6 +314,7 @@
312314
'goodwife',
313315
'governor',
314316
'graf',
317+
'gräfin',
315318
'grand',
316319
'großfürst',
317320
'group',
@@ -412,6 +415,7 @@
412415
'mcpoc',
413416
'mcpon',
414417
'md',
418+
'me',
415419
'member',
416420
'memoirist',
417421
'merchant',
@@ -470,6 +474,7 @@
470474
'paleontologist',
471475
'pastor',
472476
'patriarch',
477+
'pd',
473478
'pediatrician',
474479
'personality',
475480
'petty',
@@ -512,6 +517,7 @@
512517
'printer',
513518
'printmaker',
514519
'prinz',
520+
'priv.-doz',
515521
'prior',
516522
'private',
517523
'pro',
@@ -526,6 +532,7 @@
526532
'pursuivant',
527533
'pv2',
528534
'pvt',
535+
'ra',
529536
'rabbi',
530537
'radio',
531538
'radm',
@@ -648,6 +655,7 @@
648655
'tsgt',
649656
'uk',
650657
'united',
658+
'univ.prof',
651659
'us',
652660
'vadm',
653661
'vardapet',
@@ -665,6 +673,7 @@
665673
'warrant',
666674
'wing',
667675
'wm',
676+
'wp',
668677
'wo-1',
669678
'wo1',
670679
'wo2',

nameparser/parser.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,11 @@ def join_on_conjunctions(self, pieces: list[str], additional_parts_count: int =
11491149
if self.is_title(pieces[i+1]):
11501150
# when joining to a title, make new_piece a title too
11511151
self.C.titles.add(new_piece)
1152+
if self.is_prefix(pieces[i+1]):
1153+
# when joining to a prefix, make new_piece a prefix too, so
1154+
# e.g. "von" + "und" bridges into "von und" and can still
1155+
# chain onto a following prefix/lastname (see "von und zu")
1156+
self.C.prefixes.add(new_piece)
11521157
pieces[i] = new_piece
11531158
pieces.pop(i+1)
11541159
# subtract 1 from the index of all the remaining conjunctions
@@ -1161,6 +1166,11 @@ def join_on_conjunctions(self, pieces: list[str], additional_parts_count: int =
11611166
if self.is_title(pieces[i-1]):
11621167
# when joining to a title, make new_piece a title too
11631168
self.C.titles.add(new_piece)
1169+
if self.is_prefix(pieces[i-1]):
1170+
# when joining to a prefix, make new_piece a prefix too, so
1171+
# e.g. "von" + "und" bridges into "von und" and can still
1172+
# chain onto a following prefix/lastname (see "von und zu")
1173+
self.C.prefixes.add(new_piece)
11641174
pieces[i-1] = new_piece
11651175
pieces.pop(i)
11661176
rm_count = 2

tests/test_conjunctions.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,54 @@ def test_conjunction_in_an_address_with_a_first_name_title(self) -> None:
223223
def test_name_is_conjunctions(self) -> None:
224224
hn = HumanName("e and e")
225225
self.m(hn.first, "e and e", hn)
226+
227+
def test_conjunction_bridges_prefix_chain(self) -> None:
228+
# "von" and "zu" are both prefixes, but "und" between them is only a
229+
# conjunction. join_on_conjunctions() merges "von und zu" into one
230+
# piece before the prefix-joining step runs, so without registering
231+
# that merged piece as a prefix too, it's stranded in the middle name
232+
# instead of joining to the last name. See German nobility styles
233+
# like "von und zu".
234+
hn = HumanName("Alois von und zu Liechtenstein")
235+
self.m(hn.first, "Alois", hn)
236+
self.m(hn.middle, "", hn)
237+
self.m(hn.last, "von und zu Liechtenstein", hn)
238+
239+
def test_conjunction_bridges_prefix_chain_with_leading_title(self) -> None:
240+
# Same bridging, but with extra prefix words on both sides of the
241+
# conjunction and a leading title-like word ("Freiherrin") that is
242+
# itself a prefix, confirming the chain still joins fully into last.
243+
hn = HumanName("Annette Charlotte Freiherrin von und zu der Tann-Rathsamhausen")
244+
self.m(hn.first, "Annette", hn)
245+
self.m(hn.middle, "Charlotte", hn)
246+
self.m(hn.last, "Freiherrin von und zu der Tann-Rathsamhausen", hn)
247+
248+
def test_conjunction_prefix_merge_at_start_stays_first_name(self) -> None:
249+
# Guards the i == 0 branch of the same fix: when the conjunction is
250+
# merged with a following prefix at the very start of the name, the
251+
# existing leading-prefix rule (a lone prefix opening the name is
252+
# treated as part of the first name, not joined to last) must still
253+
# apply to the merged piece.
254+
hn = HumanName("and van Buren")
255+
self.m(hn.first, "and van", hn)
256+
self.m(hn.last, "Buren", hn)
257+
258+
def test_conjunction_bridges_word_that_is_both_title_and_prefix(self) -> None:
259+
# "freiherr" is registered as both a title and a prefix. When it sits
260+
# next to a conjunction, join_on_conjunctions() runs the is_title and
261+
# is_prefix checks independently (not elif), so the merged piece
262+
# ("freiherr und") is added to both constants sets. Confirms that
263+
# dual registration doesn't break the prefix-bridging into last.
264+
hn = HumanName("Fritz Freiherr und von Bar")
265+
self.m(hn.first, "Fritz", hn)
266+
self.m(hn.middle, "", hn)
267+
self.m(hn.last, "Freiherr und von Bar", hn)
268+
269+
def test_conjunction_bridges_prefix_chain_with_multiple_conjunctions(self) -> None:
270+
# Two separate conjunctions ("und" appearing twice, not contiguous)
271+
# each bridge their own pair of adjacent prefixes, so both merges
272+
# must register as prefixes for the whole chain to join into last.
273+
hn = HumanName("Alois von und zu und von Liechtenstein")
274+
self.m(hn.first, "Alois", hn)
275+
self.m(hn.middle, "", hn)
276+
self.m(hn.last, "von und zu und von Liechtenstein", hn)

0 commit comments

Comments
 (0)