From b9dc98d21881ea1b2bf292962233d4177bbf2018 Mon Sep 17 00:00:00 2001
From: Maxime Petazzoni
Date: Tue, 11 May 2010 17:10:51 +0200
Subject: [PATCH ocitysmap] German prefix rules
Signed-off-by: Maxime Petazzoni
---
ocitysmap/i18n.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 71 insertions(+), 5 deletions(-)
diff --git a/ocitysmap/i18n.py b/ocitysmap/i18n.py
index 37563cf..a9c1344 100644
--- a/ocitysmap/i18n.py
+++ b/ocitysmap/i18n.py
@@ -640,6 +640,72 @@ class i18n_pl_generic(i18n):
return a == b
+class i18n_de_generic(i18n):
+ #
+ # German streets are often named after people and include a title.
+ # The title will be captured as part of the
+ # Covering airport names and "New"/"Old" as prefixes as well
+ #
+ APPELLATIONS = [ u"Alte", u"Alter", u"Doktor", u"Dr.",
+ u"Flughafen", u"Flugplatz", u"Gen.,", u"General",
+ u"Neue", u"Neuer", u"Platz",
+ u"Prinz", u"Prinzessin", u"Prof.",
+ u"Professor" ]
+ #
+ # Surnames in german streets named after people tend to have the middle name
+ # listed after the rest of the surname,
+ # e.g. "Platz der deutschen Einheit" => "deutschen Einheit (Platz der)"
+ # Likewise, articles are captured as part of the prefix,
+ # e.g. "An der Märchenwiese" => "Märchenwiese (An der)"
+ #
+ DETERMINANTS = [ u"\s?An den", u"\s?An der", u"\s?Am",
+ u"\s?Auf den" , u"\s?Auf der"
+ u" an", u" des", u" der", u" von", u" vor"]
+
+ SPACE_REDUCE = re.compile(r"\s+")
+ PREFIX_REGEXP = re.compile(r"^(?P(%s)(%s)?)\s?\b(?P.+)" %
+ ("|".join(APPELLATIONS),
+ "|".join(DETERMINANTS)), re.IGNORECASE
+ | re.UNICODE)
+
+ # for IndexPageGenerator._upper_unaccent_string
+ E_ACCENT = re.compile(ur"[éèêëẽ]", re.IGNORECASE | re.UNICODE)
+ I_ACCENT = re.compile(ur"[íìîïĩ]", re.IGNORECASE | re.UNICODE)
+ A_ACCENT = re.compile(ur"[áàâäã]", re.IGNORECASE | re.UNICODE)
+ O_ACCENT = re.compile(ur"[óòôöõ]", re.IGNORECASE | re.UNICODE)
+ U_ACCENT = re.compile(ur"[úùûüũ]", re.IGNORECASE | re.UNICODE)
+
+ def __init__(self, language, locale_path):
+ self.language = str(language)
+ _install_language(language, locale_path)
+
+ def _upper_unaccent_string(self, s):
+ s = self.E_ACCENT.sub("e", s)
+ s = self.I_ACCENT.sub("i", s)
+ s = self.A_ACCENT.sub("a", s)
+ s = self.O_ACCENT.sub("o", s)
+ s = self.U_ACCENT.sub("u", s)
+ return s.upper()
+
+ def language_code(self):
+ return self.language
+
+ def user_readable_street(self, name):
+ #
+ # Make sure name actually contains something,
+ # the PREFIX_REGEXP.match fails on zero-length strings
+ #
+ if len(name) == 0:
+ return name
+
+ name = name.strip()
+ name = self.SPACE_REDUCE.sub(" ", name)
+ name = self.PREFIX_REGEXP.sub(r"\g (\g)", name)
+ return name
+
+ def first_letter_equal(self, a, b):
+ return self._upper_unaccent_string(a) == self._upper_unaccent_string(b)
+
class i18n_generic(i18n):
def __init__(self, language, locale_path):
self.language = str(language)
@@ -678,15 +744,15 @@ language_class_map = {
'en_US.UTF-8': i18n_generic,
'en_ZA.UTF-8': i18n_generic,
'en_ZW.UTF-8': i18n_generic,
- 'de_BE.UTF-8': i18n_generic,
'nl_BE.UTF-8': i18n_nl_generic,
'nl_NL.UTF-8': i18n_nl_generic,
'it_IT.UTF-8': i18n_it_generic,
'it_CH.UTF-8': i18n_it_generic,
- 'de_AT.UTF-8': i18n_generic,
- 'de_DE.UTF-8': i18n_generic,
- 'de_LU.UTF-8': i18n_generic,
- 'de_CH.UTF-8': i18n_generic,
+ 'de_AT.UTF-8': i18n_de_generic,
+ 'de_BE.UTF-8': i18n_de_generic,
+ 'de_DE.UTF-8': i18n_de_generic,
+ 'de_LU.UTF-8': i18n_de_generic,
+ 'de_CH.UTF-8': i18n_de_generic,
'es_ES.UTF-8': i18n_es_generic,
'es_AR.UTF-8': i18n_es_generic,
'es_BO.UTF-8': i18n_es_generic,
--
1.6.3.3.341.g9b22d