Issue #221712 by Damien Tournoud, Gábor Hojtsy, iliphil, Bodo Maass, idflood, loganfsmyth: Fixed locale_language_from_browser() doesn't parse language tags correctly, has a broken logic.
parent
d548e9b4e4
commit
34f117b103
|
@ -93,39 +93,82 @@ function locale_language_from_interface() {
|
|||
* otherwise we would cache a user-specific preference.
|
||||
*
|
||||
* @param $languages
|
||||
* An array of valid language objects.
|
||||
* An array of language objects for enabled languages ordered by weight.
|
||||
*
|
||||
* @return
|
||||
* A valid language code on success, FALSE otherwise.
|
||||
*/
|
||||
function locale_language_from_browser($languages) {
|
||||
// Specified by the user via the browser's Accept Language setting
|
||||
// Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
|
||||
$browser_langs = array();
|
||||
if (empty($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
|
||||
$browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);
|
||||
foreach ($browser_accept as $langpart) {
|
||||
// The language part is either a code or a code with a quality.
|
||||
// We cannot do anything with a * code, so it is skipped.
|
||||
// If the quality is missing, it is assumed to be 1 according to the RFC.
|
||||
if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($langpart), $found)) {
|
||||
$browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0);
|
||||
// The Accept-Language header contains information about the language
|
||||
// preferences configured in the user's browser / operating system.
|
||||
// RFC 2616 (section 14.4) defines the Accept-Language header as follows:
|
||||
// Accept-Language = "Accept-Language" ":"
|
||||
// 1#( language-range [ ";" "q" "=" qvalue ] )
|
||||
// language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
|
||||
// Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
|
||||
$browser_langcodes = array();
|
||||
if (preg_match_all('@([a-zA-Z-]+|\*)(?:;q=([0-9.]+))?(?:$|\s*,\s*)@', trim($_SERVER['HTTP_ACCEPT_LANGUAGE']), $matches, PREG_SET_ORDER)) {
|
||||
foreach ($matches as $match) {
|
||||
// We can safely use strtolower() here, tags are ASCII.
|
||||
// RFC2616 mandates that the decimal part is no more than three digits,
|
||||
// so we multiply the qvalue by 1000 to avoid floating point comparisons.
|
||||
$langcode = strtolower($match[1]);
|
||||
$qvalue = isset($match[2]) ? (float) $match[2] : 1;
|
||||
$browser_langcodes[$langcode] = (int) ($qvalue * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
// We should take pristine values from the HTTP headers, but Internet Explorer
|
||||
// from version 7 sends only specific language tags (eg. fr-CA) without the
|
||||
// corresponding generic tag (fr) unless explicitly configured. In that case,
|
||||
// we assume that the lowest value of the specific tags is the value of the
|
||||
// generic language to be as close to the HTTP 1.1 spec as possible.
|
||||
// See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 and
|
||||
// http://blogs.msdn.com/b/ie/archive/2006/10/17/accept-language-header-for-internet-explorer-7.aspx
|
||||
asort($browser_langcodes);
|
||||
foreach ($browser_langcodes as $langcode => $qvalue) {
|
||||
$generic_tag = strtok($langcode, '-');
|
||||
if (!isset($browser_langcodes[$generic_tag])) {
|
||||
$browser_langcodes[$generic_tag] = $qvalue;
|
||||
}
|
||||
}
|
||||
|
||||
// Find the enabled language with the greatest qvalue, following the rules
|
||||
// of RFC 2616 (section 14.4). If several languages have the same qvalue,
|
||||
// prefer the one with the greatest weight.
|
||||
$best_match_langcode = FALSE;
|
||||
$max_qvalue = 0;
|
||||
foreach ($languages as $langcode => $language) {
|
||||
// Language tags are case insensitive (RFC2616, sec 3.10).
|
||||
$langcode = strtolower($langcode);
|
||||
|
||||
// If nothing matches below, the default qvalue is the one of the wildcard
|
||||
// language, if set, or is 0 (which will never match).
|
||||
$qvalue = isset($browser_langcodes['*']) ? $browser_langcodes['*'] : 0;
|
||||
|
||||
// Find the longest possible prefix of the browser-supplied language
|
||||
// ('the language-range') that matches this site language ('the language tag').
|
||||
$prefix = $langcode;
|
||||
do {
|
||||
if (isset($browser_langcodes[$prefix])) {
|
||||
$qvalue = $browser_langcodes[$prefix];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
while ($prefix = substr($prefix, 0, strrpos($prefix, '-')));
|
||||
|
||||
// Order the codes by quality
|
||||
arsort($browser_langs);
|
||||
|
||||
// Try to find the first preferred language we have
|
||||
foreach ($browser_langs as $langcode => $q) {
|
||||
if (isset($languages[$langcode])) {
|
||||
return $langcode;
|
||||
// Find the best match.
|
||||
if ($qvalue > $max_qvalue) {
|
||||
$best_match_langcode = $language->language;
|
||||
$max_qvalue = $qvalue;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
return $best_match_langcode;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1387,6 +1387,125 @@ class LocaleLanguageSwitchingFunctionalTest extends DrupalWebTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test browser language detection.
|
||||
*/
|
||||
class LocaleBrowserDetectionTest extends DrupalUnitTestCase {
|
||||
|
||||
public static function getInfo() {
|
||||
return array(
|
||||
'name' => 'Browser language detection',
|
||||
'description' => 'Tests for the browser language detection.',
|
||||
'group' => 'Locale',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unit tests for the locale_language_from_browser() function.
|
||||
*/
|
||||
function testLanguageFromBrowser() {
|
||||
// Load the required functions.
|
||||
require_once DRUPAL_ROOT . '/includes/locale.inc';
|
||||
|
||||
$languages = array(
|
||||
// In our test case, 'en' has priority over 'en-US'.
|
||||
'en' => (object) array(
|
||||
'language' => 'en',
|
||||
),
|
||||
'en-US' => (object) array(
|
||||
'language' => 'en-US',
|
||||
),
|
||||
// But 'fr-CA' has priority over 'fr'.
|
||||
'fr-CA' => (object) array(
|
||||
'language' => 'fr-CA',
|
||||
),
|
||||
'fr' => (object) array(
|
||||
'language' => 'fr',
|
||||
),
|
||||
// 'es-MX' is alone.
|
||||
'es-MX' => (object) array(
|
||||
'language' => 'es-MX',
|
||||
),
|
||||
// 'pt' is alone.
|
||||
'pt' => (object) array(
|
||||
'language' => 'pt',
|
||||
),
|
||||
// Language codes with more then one dash are actually valid.
|
||||
// eh-oh-laa-laa is the official language code of the Teletubbies.
|
||||
'eh-oh-laa-laa' => (object) array(
|
||||
'language' => 'eh-oh-laa-laa',
|
||||
),
|
||||
);
|
||||
|
||||
$test_cases = array(
|
||||
// Equal qvalue for each language, choose the site prefered one.
|
||||
'en,en-US,fr-CA,fr,es-MX' => 'en',
|
||||
'en-US,en,fr-CA,fr,es-MX' => 'en',
|
||||
'fr,en' => 'en',
|
||||
'en,fr' => 'en',
|
||||
'en-US,fr' => 'en',
|
||||
'fr,en-US' => 'en',
|
||||
'fr,fr-CA' => 'fr-CA',
|
||||
'fr-CA,fr' => 'fr-CA',
|
||||
'fr' => 'fr-CA',
|
||||
'fr;q=1' => 'fr-CA',
|
||||
'fr,es-MX' => 'fr-CA',
|
||||
'fr,es' => 'fr-CA',
|
||||
'es,fr' => 'fr-CA',
|
||||
'es-MX,de' => 'es-MX',
|
||||
'de,es-MX' => 'es-MX',
|
||||
|
||||
// Different cases and whitespace.
|
||||
'en' => 'en',
|
||||
'En' => 'en',
|
||||
'EN' => 'en',
|
||||
' en' => 'en',
|
||||
'en ' => 'en',
|
||||
|
||||
// A less specific language from the browser matches a more specific one
|
||||
// from the website, and the other way around for compatibility with
|
||||
// some versions of Internet Explorer.
|
||||
'es' => 'es-MX',
|
||||
'es-MX' => 'es-MX',
|
||||
'pt' => 'pt',
|
||||
'pt-PT' => 'pt',
|
||||
'pt-PT;q=0.5,pt-BR;q=1,en;q=0.7' => 'en',
|
||||
'pt-PT;q=1,pt-BR;q=0.5,en;q=0.7' => 'en',
|
||||
'pt-PT;q=0.4,pt-BR;q=0.1,en;q=0.7' => 'en',
|
||||
'pt-PT;q=0.1,pt-BR;q=0.4,en;q=0.7' => 'en',
|
||||
|
||||
// Language code with several dashes are valid. The less specific language
|
||||
// from the browser matches the more specific one from the website.
|
||||
'eh-oh-laa-laa' => 'eh-oh-laa-laa',
|
||||
'eh-oh-laa' => 'eh-oh-laa-laa',
|
||||
'eh-oh' => 'eh-oh-laa-laa',
|
||||
'eh' => 'eh-oh-laa-laa',
|
||||
|
||||
// Different qvalues.
|
||||
'en-US,en;q=0.5,fr;q=0.25' => 'en-US',
|
||||
'fr,en;q=0.5' => 'fr-CA',
|
||||
'fr,en;q=0.5,fr-CA;q=0.25' => 'fr',
|
||||
|
||||
// Silly wildcards are also valid.
|
||||
'*,fr-CA;q=0.5' => 'en',
|
||||
'*,en;q=0.25' => 'fr-CA',
|
||||
'en,en-US;q=0.5,fr;q=0.25' => 'en',
|
||||
'en-US,en;q=0.5,fr;q=0.25' => 'en-US',
|
||||
|
||||
// Unresolvable cases.
|
||||
'' => FALSE,
|
||||
'de,pl' => FALSE,
|
||||
$this->randomName(10) => FALSE,
|
||||
);
|
||||
|
||||
foreach ($test_cases as $accept_language => $expected_result) {
|
||||
$_SERVER['HTTP_ACCEPT_LANGUAGE'] = $accept_language;
|
||||
$result = locale_language_from_browser($languages);
|
||||
$this->assertIdentical($result, $expected_result, t("Language selection '@accept-language' selects '@result', result = '@actual'", array('@accept-language' => $accept_language, '@result' => $expected_result, '@actual' => isset($result) ? $result : 'none')));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Functional tests for a user's ability to change their default language.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue