diff options
author | webchick <webchick@24967.no-reply.drupal.org> | 2011-10-17 09:14:30 -0700 |
---|---|---|
committer | webchick <webchick@24967.no-reply.drupal.org> | 2011-10-17 09:14:30 -0700 |
commit | 34f117b10370ab2805696e4afe700e7a14df95ee (patch) | |
tree | f1e52ead3955176e582d2445297f3157310d3373 /includes | |
parent | d548e9b4e450212cb09884b3a346522d77bec6b6 (diff) | |
download | brdo-34f117b10370ab2805696e4afe700e7a14df95ee.tar.gz brdo-34f117b10370ab2805696e4afe700e7a14df95ee.tar.bz2 |
Issue #221712 by Damien Tournoud, Gábor Hojtsy, iliphil, Bodo Maass, idflood, loganfsmyth: Fixed locale_language_from_browser() doesn't parse language tags correctly, has a broken logic.
Diffstat (limited to 'includes')
-rw-r--r-- | includes/locale.inc | 83 |
1 files changed, 63 insertions, 20 deletions
diff --git a/includes/locale.inc b/includes/locale.inc index 99abdfa44..6ebb8972c 100644 --- a/includes/locale.inc +++ b/includes/locale.inc @@ -93,39 +93,82 @@ function locale_language_from_interface() { * otherwise we would cache a user-specific preference. * * @param $languages - * An array of valid language objects. + * An array of language objects for enabled languages ordered by weight. * * @return * A valid language code on success, FALSE otherwise. */ function locale_language_from_browser($languages) { - // Specified by the user via the browser's Accept Language setting + if (empty($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { + return FALSE; + } + + // The Accept-Language header contains information about the language + // preferences configured in the user's browser / operating system. + // RFC 2616 (section 14.4) defines the Accept-Language header as follows: + // Accept-Language = "Accept-Language" ":" + // 1#( language-range [ ";" "q" "=" qvalue ] ) + // language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5" - $browser_langs = array(); - - if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { - $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']); - foreach ($browser_accept as $langpart) { - // The language part is either a code or a code with a quality. - // We cannot do anything with a * code, so it is skipped. - // If the quality is missing, it is assumed to be 1 according to the RFC. - if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($langpart), $found)) { - $browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0); - } + $browser_langcodes = array(); + if (preg_match_all('@([a-zA-Z-]+|\*)(?:;q=([0-9.]+))?(?:$|\s*,\s*)@', trim($_SERVER['HTTP_ACCEPT_LANGUAGE']), $matches, PREG_SET_ORDER)) { + foreach ($matches as $match) { + // We can safely use strtolower() here, tags are ASCII. + // RFC2616 mandates that the decimal part is no more than three digits, + // so we multiply the qvalue by 1000 to avoid floating point comparisons. + $langcode = strtolower($match[1]); + $qvalue = isset($match[2]) ? (float) $match[2] : 1; + $browser_langcodes[$langcode] = (int) ($qvalue * 1000); + } + } + + // We should take pristine values from the HTTP headers, but Internet Explorer + // from version 7 sends only specific language tags (eg. fr-CA) without the + // corresponding generic tag (fr) unless explicitly configured. In that case, + // we assume that the lowest value of the specific tags is the value of the + // generic language to be as close to the HTTP 1.1 spec as possible. + // See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 and + // http://blogs.msdn.com/b/ie/archive/2006/10/17/accept-language-header-for-internet-explorer-7.aspx + asort($browser_langcodes); + foreach ($browser_langcodes as $langcode => $qvalue) { + $generic_tag = strtok($langcode, '-'); + if (!isset($browser_langcodes[$generic_tag])) { + $browser_langcodes[$generic_tag] = $qvalue; } } - // Order the codes by quality - arsort($browser_langs); + // Find the enabled language with the greatest qvalue, following the rules + // of RFC 2616 (section 14.4). If several languages have the same qvalue, + // prefer the one with the greatest weight. + $best_match_langcode = FALSE; + $max_qvalue = 0; + foreach ($languages as $langcode => $language) { + // Language tags are case insensitive (RFC2616, sec 3.10). + $langcode = strtolower($langcode); + + // If nothing matches below, the default qvalue is the one of the wildcard + // language, if set, or is 0 (which will never match). + $qvalue = isset($browser_langcodes['*']) ? $browser_langcodes['*'] : 0; + + // Find the longest possible prefix of the browser-supplied language + // ('the language-range') that matches this site language ('the language tag'). + $prefix = $langcode; + do { + if (isset($browser_langcodes[$prefix])) { + $qvalue = $browser_langcodes[$prefix]; + break; + } + } + while ($prefix = substr($prefix, 0, strrpos($prefix, '-'))); - // Try to find the first preferred language we have - foreach ($browser_langs as $langcode => $q) { - if (isset($languages[$langcode])) { - return $langcode; + // Find the best match. + if ($qvalue > $max_qvalue) { + $best_match_langcode = $language->language; + $max_qvalue = $qvalue; } } - return FALSE; + return $best_match_langcode; } /** |