URL: http://github.com/derek73/python-nameparser/pull/122.diff
# and removing the matches from self._full_name + nn_matches = [] + nn_sep = self.C.regexes.nn_sep_safe + _fn = self._full_name + for _re in self._nickname_regexes: + if _re.search(_fn): + nn_matches.extend( _re.finditer(_fn) ) + #remove matches from string + for _match in _re.finditer(_fn): + _fn = (' ' * (_match.end() - _match.start())).join([_fn[:_match.start()], _fn[_match.end():]]) + + if len(nn_matches) == 0: + return #"empty matches" + + nn_matches.sort(key=lambda x: x.span()) + + #remove any inter-match commas, if safe to do so + for low, high in zip(nn_matches[0:-1], nn_matches[1:]): + if nn_sep.search(self._full_name[low.span()[1]:high.span()[0]]) is None: + self._full_name = ' '.join([self._full_name[:low.span()[1]], self._full_name[high.span()[0]:] ]) + + for nn_match in nn_matches: + self.nickname_list.append( nn_match.group(1) ) + self._full_name = nn_match.re.sub(' ', self._full_name, 1) def squash_emoji(self): """ @@ -452,6 +497,18 @@ def handle_firstnames(self): and not lc(self.title) in self.C.first_name_titles: self.last, self.first = self.first, self.last + def parse_parenthesized_suffixes(self): + """ + Extract any parenthesized suffixes: (ret. | ret | vet. | vet) + """ + _re = self.C.regexes.paren_suffix + if _re.search(self._full_name): + for _match in _re.finditer(self._full_name): + self.suffix_list.append(_match.group(1)) + + self._full_name = _re.sub(' ', self._full_name) + + def parse_full_name(self): """ diff --git a/tests.py b/tests.py index 5f976b8..b19a0cc 100644 --- a/tests.py +++ b/tests.py @@ -27,6 +27,7 @@ from nameparser import HumanName from nameparser.util import u from nameparser.config import Constants +import re log = logging.getLogger('HumanName') @@ -1491,7 +1492,36 @@ def test_nickname_and_last_name_with_title(self): self.m(hn.last, "Edmonds", hn) self.m(hn.nickname, "Rick", hn) + def test_append_nickname(self): + hn = HumanName() + new_rgx = re.compile(r'(?!\w)\(_open(\w[^)]*?)\):close(?!\w)', re.UNICODE) + hn._nickname_regexes.append(new_rgx) + self.assertEqual(hn._nickname_regexes[-1], new_rgx) + hn.full_name = r"Benjamin (_openBen):close Franklin" + self.m(hn.first, "Benjamin", hn) + self.m(hn.middle, ":close", hn) + self.m(hn.last, "Franklin", hn) + self.m(hn.nickname, "_openBen", hn) + def test_prepend_nickname(self): + hn = HumanName() + new_rgx = re.compile(r'(?!\w)\(_open(\w[^)]*?)\):close(?!\w)', re.UNICODE) + hn._nickname_regexes.insert(0, new_rgx) + self.assertEqual(hn._nickname_regexes[0], new_rgx) + hn.full_name = r"Benjamin (_openBen):close Franklin" + self.m(hn.first, "Benjamin", hn) + self.m(hn.middle, "", hn) + self.m(hn.last, "Franklin", hn) + self.m(hn.nickname, "Ben", hn) + + def test_multiple_nicknames(self): + hn = HumanName('Chief Justice John (JR), "No Glove, No Love" Glover Roberts, Jr.') + self.m(hn.title, 'Chief Justice', hn) + self.m(hn.first, "John", hn) + self.m(hn.middle, "Glover", hn) + self.m(hn.last, "Roberts", hn) + self.m(hn.suffix, "Jr.", hn) + self.m(hn.nickname, '"JR", "No Glove, No Love"', hn) # class MaidenNameTestCase(HumanNameTestBase): # @@ -1766,6 +1796,14 @@ def test_suffix_with_periods_with_lastname_comma(self): self.m(hn.last, "Doe", hn) self.m(hn.suffix, "Msc.Ed.", hn) + def test_suffix_parenthesized_with_nickname(self): + hn = HumanName("Gen Dwight David (Ike) Eisenhower (ret.) KG") + self.m(hn.title, "Gen", hn) + self.m(hn.first, "Dwight", hn) + self.m(hn.middle, "David", hn) + self.m(hn.last, "Eisenhower", hn) + self.m(hn.suffix, "(ret.), KG", hn) + self.m(hn.nickname, "Ike", hn) class TitleTestCase(HumanNameTestBase):Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: