accept-to-gettext.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. <?php
  2. /*
  3. * accept-to-gettext.inc -- convert information in 'Accept-*' headers to
  4. * gettext language identifiers.
  5. * Copyright (c) 2003, Wouter Verhelst <wouter@debian.org>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * Usage:
  22. *
  23. * $locale=al2gt(<array of supported languages/charsets in gettext syntax>,
  24. * <MIME type of document>);
  25. * setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever...
  26. *
  27. * Example:
  28. *
  29. * $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8');
  30. * $locale=al2gt($langs, 'text/html');
  31. * setlocale('LC_ALL', $locale);
  32. *
  33. * Note that this will send out header information (to be
  34. * RFC2616-compliant), so it must be called before anything is sent to
  35. * the user.
  36. *
  37. * Assumptions made:
  38. * * Charset encodings are written the same way as the Accept-Charset
  39. * HTTP header specifies them (RFC2616), except that they're parsed
  40. * case-insensitive.
  41. * * Country codes and language codes are the same in both gettext and
  42. * the Accept-Language syntax (except for the case differences, which
  43. * are dealt with easily). If not, some input may be ignored.
  44. * * The provided gettext-strings are fully qualified; i.e., no "en_US";
  45. * always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been
  46. * used. "en.ISO-8859-15" is OK, though.
  47. * * The language is more important than the charset; i.e., if the
  48. * following is given:
  49. *
  50. * Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3
  51. * Accept-Charset: ISO-8859-15, utf-8;q=0.5
  52. *
  53. * And the supplied parameter contains (amongst others) nl_BE.UTF-8
  54. * and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked.
  55. *
  56. * $Log: accept-to-gettext.inc,v $
  57. * Revision 1.1.1.1 2003/11/19 19:31:15 wouter
  58. * * moved to new CVS repo after death of the old
  59. * * Fixed code to apply a default to both Accept-Charset and
  60. * Accept-Language if none of those headers are supplied; patch from
  61. * Dominic Chambers <dominic@encasa.com>
  62. *
  63. * Revision 1.2 2003/08/14 10:23:59 wouter
  64. * Removed little error in Content-Type header syntaxis.
  65. *
  66. * 2007-04-01
  67. * add '@' before use of arrays, to avoid PHP warnings.
  68. */
  69. /* not really important, this one; perhaps I could've put it inline with
  70. * the rest. */
  71. function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval,
  72. $gtlang)
  73. {
  74. if($curlscore < $langval) {
  75. $curlscore=$langval;
  76. $curcscore=$charval;
  77. $curgtlang=$gtlang;
  78. } else if ($curlscore == $langval) {
  79. if($curcscore < $charval) {
  80. $curcscore=$charval;
  81. $curgtlang=$gtlang;
  82. }
  83. }
  84. return array($curlscore, $curcscore, $curgtlang);
  85. }
  86. function al2gt($gettextlangs, $mime) {
  87. /* default to "everything is acceptable", as RFC2616 specifies */
  88. $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' :
  89. $_SERVER["HTTP_ACCEPT_LANGUAGE"]);
  90. $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' :
  91. $_SERVER["HTTP_ACCEPT_CHARSET"]);
  92. $alparts=@preg_split("/,/",$acceptLang);
  93. $acparts=@preg_split("/,/",$acceptChar);
  94. /* Parse the contents of the Accept-Language header.*/
  95. foreach($alparts as $part) {
  96. $part=trim($part);
  97. if(preg_match("/;/", $part)) {
  98. $lang=@preg_split("/;/",$part);
  99. $score=@preg_split("/=/",$lang[1]);
  100. $alscores[$lang[0]]=$score[1];
  101. } else {
  102. $alscores[$part]=1;
  103. }
  104. }
  105. /* Do the same for the Accept-Charset header. */
  106. /* RFC2616: ``If no "*" is present in an Accept-Charset field, then
  107. * all character sets not explicitly mentioned get a quality value of
  108. * 0, except for ISO-8859-1, which gets a quality value of 1 if not
  109. * explicitly mentioned.''
  110. *
  111. * Making it 2 for the time being, so that we
  112. * can distinguish between "not specified" and "specified as 1" later
  113. * on. */
  114. $acscores["ISO-8859-1"]=2;
  115. foreach($acparts as $part) {
  116. $part=trim($part);
  117. if(preg_match("/;/", $part)) {
  118. $cs=@preg_split("/;/",$part);
  119. $score=@preg_split("/=/",$cs[1]);
  120. $acscores[strtoupper($cs[0])]=$score[1];
  121. } else {
  122. $acscores[strtoupper($part)]=1;
  123. }
  124. }
  125. if($acscores["ISO-8859-1"]==2) {
  126. $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1);
  127. }
  128. /*
  129. * Loop through the available languages/encodings, and pick the one
  130. * with the highest score, excluding the ones with a charset the user
  131. * did not include.
  132. */
  133. $curlscore=0;
  134. $curcscore=0;
  135. $curgtlang=NULL;
  136. foreach($gettextlangs as $gtlang) {
  137. $tmp1=preg_replace("/\_/","-",$gtlang);
  138. $tmp2=@preg_split("/\./",$tmp1);
  139. $allang=strtolower($tmp2[0]);
  140. $gtcs=strtoupper($tmp2[1]);
  141. $noct=@preg_split("/-/",$allang);
  142. $testvals=array(
  143. array(@$alscores[$allang], @$acscores[$gtcs]),
  144. array(@$alscores[$noct[0]], @$acscores[$gtcs]),
  145. array(@$alscores[$allang], @$acscores["*"]),
  146. array(@$alscores[$noct[0]], @$acscores["*"]),
  147. array(@$alscores["*"], @$acscores[$gtcs]),
  148. array(@$alscores["*"], @$acscores["*"]));
  149. $found=FALSE;
  150. foreach($testvals as $tval) {
  151. if(!$found && isset($tval[0]) && isset($tval[1])) {
  152. $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0],
  153. $tval[1], $gtlang);
  154. $curlscore=$arr[0];
  155. $curcscore=$arr[1];
  156. $curgtlang=$arr[2];
  157. $found=TRUE;
  158. }
  159. }
  160. }
  161. /* We must re-parse the gettext-string now, since we may have found it
  162. * through a "*" qualifier.*/
  163. $gtparts=@preg_split("/\./",$curgtlang);
  164. $tmp=strtolower($gtparts[0]);
  165. $lang=preg_replace("/\_/", "-", $tmp);
  166. $charset=$gtparts[1];
  167. header("Content-Language: $lang");
  168. header("Content-Type: $mime; charset=$charset");
  169. return $curgtlang;
  170. }
  171. ?>