ISO639.php 9.3 KB


  1. <?php
  2. /**
  3. * Part of Text_LanguageDetect
  4. *
  5. * PHP version 5
  6. *
  7. * @category Text
  8. * @package Text_LanguageDetect
  9. * @author Christian Weiske <cweiske@php.net>
  10. * @copyright 2011 Christian Weiske <cweiske@php.net>
  11. * @license http://www.debian.org/misc/bsd.license BSD
  12. * @version SVN: $Id$
  13. * @link http://pear.php.net/package/Text_LanguageDetect/
  14. */
  15. /**
  16. * Provides a mapping between the languages from lang.dat and the
  17. * ISO 639-1 and ISO-639-2 codes.
  18. *
  19. * Note that this class contains only languages that exist in lang.dat.
  20. *
  21. * @category Text
  22. * @package Text_LanguageDetect
  23. * @author Christian Weiske <cweiske@php.net>
  24. * @copyright 2011 Christian Weiske <cweiske@php.net>
  25. * @license http://www.debian.org/misc/bsd.license BSD
  26. * @link http://www.loc.gov/standards/iso639-2/php/code_list.php
  27. */
  28. class Text_LanguageDetect_ISO639
  29. {
  30. /**
  31. * Maps all language names from the language database to the
  32. * ISO 639-1 2-letter language code.
  33. *
  34. * NULL indicates that there is no 2-letter code.
  35. *
  36. * @var array
  37. */
  38. public static $nameToCode2 = array(
  39. 'albanian' => 'sq',
  40. 'arabic' => 'ar',
  41. 'azeri' => 'az',
  42. 'bengali' => 'bn',
  43. 'bulgarian' => 'bg',
  44. 'cebuano' => null,
  45. 'croatian' => 'hr',
  46. 'czech' => 'cs',
  47. 'danish' => 'da',
  48. 'dutch' => 'nl',
  49. 'english' => 'en',
  50. 'estonian' => 'et',
  51. 'farsi' => 'fa',
  52. 'finnish' => 'fi',
  53. 'french' => 'fr',
  54. 'german' => 'de',
  55. 'hausa' => 'ha',
  56. 'hawaiian' => null,
  57. 'hindi' => 'hi',
  58. 'hungarian' => 'hu',
  59. 'icelandic' => 'is',
  60. 'indonesian' => 'id',
  61. 'italian' => 'it',
  62. 'kazakh' => 'kk',
  63. 'kyrgyz' => 'ky',
  64. 'latin' => 'la',
  65. 'latvian' => 'lv',
  66. 'lithuanian' => 'lt',
  67. 'macedonian' => 'mk',
  68. 'mongolian' => 'mn',
  69. 'nepali' => 'ne',
  70. 'norwegian' => 'no',
  71. 'pashto' => 'ps',
  72. 'pidgin' => null,
  73. 'polish' => 'pl',
  74. 'portuguese' => 'pt',
  75. 'romanian' => 'ro',
  76. 'russian' => 'ru',
  77. 'serbian' => 'sr',
  78. 'slovak' => 'sk',
  79. 'slovene' => 'sl',
  80. 'somali' => 'so',
  81. 'spanish' => 'es',
  82. 'swahili' => 'sw',
  83. 'swedish' => 'sv',
  84. 'tagalog' => 'tl',
  85. 'turkish' => 'tr',
  86. 'ukrainian' => 'uk',
  87. 'urdu' => 'ur',
  88. 'uzbek' => 'uz',
  89. 'vietnamese' => 'vi',
  90. 'welsh' => 'cy',
  91. );
  92. /**
  93. * Maps all language names from the language database to the
  94. * ISO 639-2 3-letter language code.
  95. *
  96. * @var array
  97. */
  98. public static $nameToCode3 = array(
  99. 'albanian' => 'sqi',
  100. 'arabic' => 'ara',
  101. 'azeri' => 'aze',
  102. 'bengali' => 'ben',
  103. 'bulgarian' => 'bul',
  104. 'cebuano' => 'ceb',
  105. 'croatian' => 'hrv',
  106. 'czech' => 'ces',
  107. 'danish' => 'dan',
  108. 'dutch' => 'nld',
  109. 'english' => 'eng',
  110. 'estonian' => 'est',
  111. 'farsi' => 'fas',
  112. 'finnish' => 'fin',
  113. 'french' => 'fra',
  114. 'german' => 'deu',
  115. 'hausa' => 'hau',
  116. 'hawaiian' => 'haw',
  117. 'hindi' => 'hin',
  118. 'hungarian' => 'hun',
  119. 'icelandic' => 'isl',
  120. 'indonesian' => 'ind',
  121. 'italian' => 'ita',
  122. 'kazakh' => 'kaz',
  123. 'kyrgyz' => 'kir',
  124. 'latin' => 'lat',
  125. 'latvian' => 'lav',
  126. 'lithuanian' => 'lit',
  127. 'macedonian' => 'mkd',
  128. 'mongolian' => 'mon',
  129. 'nepali' => 'nep',
  130. 'norwegian' => 'nor',
  131. 'pashto' => 'pus',
  132. 'pidgin' => 'crp',
  133. 'polish' => 'pol',
  134. 'portuguese' => 'por',
  135. 'romanian' => 'ron',
  136. 'russian' => 'rus',
  137. 'serbian' => 'srp',
  138. 'slovak' => 'slk',
  139. 'slovene' => 'slv',
  140. 'somali' => 'som',
  141. 'spanish' => 'spa',
  142. 'swahili' => 'swa',
  143. 'swedish' => 'swe',
  144. 'tagalog' => 'tgl',
  145. 'turkish' => 'tur',
  146. 'ukrainian' => 'ukr',
  147. 'urdu' => 'urd',
  148. 'uzbek' => 'uzb',
  149. 'vietnamese' => 'vie',
  150. 'welsh' => 'cym',
  151. );
  152. /**
  153. * Maps ISO 639-1 2-letter language codes to the language names
  154. * in the language database
  155. *
  156. * Not all languages have a 2 letter code, so some are missing
  157. *
  158. * @var array
  159. */
  160. public static $code2ToName = array(
  161. 'ar' => 'arabic',
  162. 'az' => 'azeri',
  163. 'bg' => 'bulgarian',
  164. 'bn' => 'bengali',
  165. 'cs' => 'czech',
  166. 'cy' => 'welsh',
  167. 'da' => 'danish',
  168. 'de' => 'german',
  169. 'en' => 'english',
  170. 'es' => 'spanish',
  171. 'et' => 'estonian',
  172. 'fa' => 'farsi',
  173. 'fi' => 'finnish',
  174. 'fr' => 'french',
  175. 'ha' => 'hausa',
  176. 'hi' => 'hindi',
  177. 'hr' => 'croatian',
  178. 'hu' => 'hungarian',
  179. 'id' => 'indonesian',
  180. 'is' => 'icelandic',
  181. 'it' => 'italian',
  182. 'kk' => 'kazakh',
  183. 'ky' => 'kyrgyz',
  184. 'la' => 'latin',
  185. 'lt' => 'lithuanian',
  186. 'lv' => 'latvian',
  187. 'mk' => 'macedonian',
  188. 'mn' => 'mongolian',
  189. 'ne' => 'nepali',
  190. 'nl' => 'dutch',
  191. 'no' => 'norwegian',
  192. 'pl' => 'polish',
  193. 'ps' => 'pashto',
  194. 'pt' => 'portuguese',
  195. 'ro' => 'romanian',
  196. 'ru' => 'russian',
  197. 'sk' => 'slovak',
  198. 'sl' => 'slovene',
  199. 'so' => 'somali',
  200. 'sq' => 'albanian',
  201. 'sr' => 'serbian',
  202. 'sv' => 'swedish',
  203. 'sw' => 'swahili',
  204. 'tl' => 'tagalog',
  205. 'tr' => 'turkish',
  206. 'uk' => 'ukrainian',
  207. 'ur' => 'urdu',
  208. 'uz' => 'uzbek',
  209. 'vi' => 'vietnamese',
  210. );
  211. /**
  212. * Maps ISO 639-2 3-letter language codes to the language names
  213. * in the language database.
  214. *
  215. * @var array
  216. */
  217. public static $code3ToName = array(
  218. 'ara' => 'arabic',
  219. 'aze' => 'azeri',
  220. 'ben' => 'bengali',
  221. 'bul' => 'bulgarian',
  222. 'ceb' => 'cebuano',
  223. 'ces' => 'czech',
  224. 'crp' => 'pidgin',
  225. 'cym' => 'welsh',
  226. 'dan' => 'danish',
  227. 'deu' => 'german',
  228. 'eng' => 'english',
  229. 'est' => 'estonian',
  230. 'fas' => 'farsi',
  231. 'fin' => 'finnish',
  232. 'fra' => 'french',
  233. 'hau' => 'hausa',
  234. 'haw' => 'hawaiian',
  235. 'hin' => 'hindi',
  236. 'hrv' => 'croatian',
  237. 'hun' => 'hungarian',
  238. 'ind' => 'indonesian',
  239. 'isl' => 'icelandic',
  240. 'ita' => 'italian',
  241. 'kaz' => 'kazakh',
  242. 'kir' => 'kyrgyz',
  243. 'lat' => 'latin',
  244. 'lav' => 'latvian',
  245. 'lit' => 'lithuanian',
  246. 'mkd' => 'macedonian',
  247. 'mon' => 'mongolian',
  248. 'nep' => 'nepali',
  249. 'nld' => 'dutch',
  250. 'nor' => 'norwegian',
  251. 'pol' => 'polish',
  252. 'por' => 'portuguese',
  253. 'pus' => 'pashto',
  254. 'rom' => 'romanian',
  255. 'rus' => 'russian',
  256. 'slk' => 'slovak',
  257. 'slv' => 'slovene',
  258. 'som' => 'somali',
  259. 'spa' => 'spanish',
  260. 'sqi' => 'albanian',
  261. 'srp' => 'serbian',
  262. 'swa' => 'swahili',
  263. 'swe' => 'swedish',
  264. 'tgl' => 'tagalog',
  265. 'tur' => 'turkish',
  266. 'ukr' => 'ukrainian',
  267. 'urd' => 'urdu',
  268. 'uzb' => 'uzbek',
  269. 'vie' => 'vietnamese',
  270. );
  271. /**
  272. * Returns the 2-letter ISO 639-1 code for the given language name.
  273. *
  274. * @param string $lang English language name like "swedish"
  275. *
  276. * @return string Two-letter language code (e.g. "sv") or NULL if not found
  277. */
  278. public static function nameToCode2($lang)
  279. {
  280. $lang = strtolower($lang);
  281. if (!isset(self::$nameToCode2[$lang])) {
  282. return null;
  283. }
  284. return self::$nameToCode2[$lang];
  285. }
  286. /**
  287. * Returns the 3-letter ISO 639-2 code for the given language name.
  288. *
  289. * @param string $lang English language name like "swedish"
  290. *
  291. * @return string Three-letter language code (e.g. "swe") or NULL if not found
  292. */
  293. public static function nameToCode3($lang)
  294. {
  295. $lang = strtolower($lang);
  296. if (!isset(self::$nameToCode3[$lang])) {
  297. return null;
  298. }
  299. return self::$nameToCode3[$lang];
  300. }
  301. /**
  302. * Returns the language name for the given 2-letter ISO 639-1 code.
  303. *
  304. * @param string $code Two-letter language code (e.g. "sv")
  305. *
  306. * @return string English language name like "swedish"
  307. */
  308. public static function code2ToName($code)
  309. {
  310. $lang = strtolower($code);
  311. if (!isset(self::$code2ToName[$code])) {
  312. return null;
  313. }
  314. return self::$code2ToName[$code];
  315. }
  316. /**
  317. * Returns the language name for the given 3-letter ISO 639-2 code.
  318. *
  319. * @param string $code Three-letter language code (e.g. "swe")
  320. *
  321. * @return string English language name like "swedish"
  322. */
  323. public static function code3ToName($code)
  324. {
  325. $lang = strtolower($code);
  326. if (!isset(self::$code3ToName[$code])) {
  327. return null;
  328. }
  329. return self::$code3ToName[$code];
  330. }
  331. }
  332. ?>