Locales: Difference between revisions
Jump to navigation
Jump to search
m (Reverted edits by 193.145.56.193 (Talk); changed back to last version by Xavi) |
(Link to CLDR page) |
||
Line 9: | Line 9: | ||
Locale names are commonly constructed from abbreviations for countries and languages, sometimes with character set appended. For example, en-us.utf8 specifies conventions appropriate to US English, with Unicode character set in UTF-8 encoding. The en-us locale differs on many but not all points from en-gb (Great Britain) or en-ca (Canada) and so on. All of these are significantly different from hi-in (Hindi in India) or zh-tw (Traditional Chinese in Taiwain). |
Locale names are commonly constructed from abbreviations for countries and languages, sometimes with character set appended. For example, en-us.utf8 specifies conventions appropriate to US English, with Unicode character set in UTF-8 encoding. The en-us locale differs on many but not all points from en-gb (Great Britain) or en-ca (Canada) and so on. All of these are significantly different from hi-in (Hindi in India) or zh-tw (Traditional Chinese in Taiwain). |
||
The best source for locale data is the Unicode [http://www.unicode.org/cldr Common Locale Data Repository] (CLDR) |
|||
Mandriva Linux has the following locale support. |
Mandriva Linux has the following locale support. |
Revision as of 17:20, 3 February 2008
In software, a locale specifies a choice of language-, country-, and culture-specific ways of representing common kinds of information, specifically
- time: 2:00 PM vs. 14:00
- dates: 3/2/2006 vs. 2006-3-2
- numbers: 1,000,000.1 vs. 1.000.000,1
- currency: $100 vs. USD100
- character set and encoding: extended ASCII vs. Unicode UTF-8
- measurements: US vs. SI (metric)
Locale names are commonly constructed from abbreviations for countries and languages, sometimes with character set appended. For example, en-us.utf8 specifies conventions appropriate to US English, with Unicode character set in UTF-8 encoding. The en-us locale differs on many but not all points from en-gb (Great Britain) or en-ca (Canada) and so on. All of these are significantly different from hi-in (Hindi in India) or zh-tw (Traditional Chinese in Taiwain).
The best source for locale data is the Unicode Common Locale Data Repository (CLDR)
Mandriva Linux has the following locale support.
#- key: lang name (locale name for some (~5) special cases needing #- extra distinctions) #- [0]: lang name in english #- [1]: transliterated locale name in the locale name (used for sorting) #- [2]: default locale name to use for that language if there is not #- an existing locale for the combination language+country choosen #- [3]: geographic groups that this language belongs to (for displaying #- in the menu grouped in smaller lists), 1=Europe, 2=Asia, 3=Africa, #- 4=Oceania&Pacific, 5=America (if you wonder, it's the order #- used in the olympic flag) #- [4]: special value for LANGUAGE variable (if different of the default #- of 'll_CC:ll_DD:ll' (ll_CC: locale (if exist) resulting of the #- combination of chosen lang (ll) and country (CC), ll_DD: the #- default locale shown here (field [2]) and ll: the language (the key)) our %langs = ( 'af' => [ 'Afrikaans', 'Afrikaans', 'af_ZA', ' 3 ', 'iso-8859-1' ], 'am' => [ 'Amharic', 'ZZ emarNa', 'am_ET', ' 3 ', 'utf_ethi' ], 'ar' => [ 'Arabic', 'AA Arabic', 'ar_EG', ' 23 ', 'utf_ar' ], 'as' => [ 'Assamese', 'ZZ Assamese', 'as_IN', ' 2 ', 'utf_beng' ], 'az' => [ 'Azeri (Latin)', 'Azerbaycanca', 'az_AZ', ' 2 ', 'utf_az' ], 'be' => [ 'Belarussian', 'Belaruskaya', 'be_BY', '1 ', 'utf_cyr1' ], 'ber' => [ 'Berber', 'ZZ Tamazight', 'ber_MA', ' 3 ', 'utf_tfng', 'ber_MA:ber:fr' ], 'bg' => [ '[[Bulgarian', 'Blgarski', 'bg_BG', '1 ', 'cp1251' ], 'bn' => [ 'Bengali', 'ZZ Bengali', 'bn_BD', ' 2 ', 'utf_beng' ], #- bo_CN not yet done, using dz_BT locale instead 'bo' => [ 'Tibetan', 'ZZ Bod skad', 'dz_BT', ' 2 ', 'utf_tibt', 'bo' ], 'br' => [ 'Breton', 'Brezhoneg', 'br_FR', '1 ', 'iso-8859-15', 'br:fr_FR:fr' ], 'bs' => [ 'Bosnian', 'Bosanski', 'bs_BA', '1 ', 'iso-8859-2' ], 'ca' => [ 'Catalan', 'Catala', 'ca_ES', '1 ', 'iso-8859-15', 'ca:es_ES:es' ], 'ca@valencian' => [ 'Catalan (Valencian)', 'Catala (Valencia)', 'ca_ES', '1 ', 'iso-8859-15', 'ca_ES@valencian:ca@valencian:ca:es_ES:es' ], 'cs' => [ 'Czech', 'Cestina', 'cs_CZ', '1 ', 'iso-8859-2' ], 'cy' => [ 'Welsh', 'Cymraeg', 'cy_GB', '1 ', 'utf_lat8', 'cy:en_GB:en' ], 'da' => [ 'Danish', 'Dansk', 'da_DK', '1 ', 'iso-8859-15' ], 'de' => [ 'German', 'Deutsch', 'de_DE', '1 ', 'iso-8859-15' ], 'dz' => [ 'Buthanese', 'ZZ Dzhonka', 'dz_BT', ' 2 ', 'utf_tibt' ], 'el' => [ 'Greek', 'Ellynika', 'el_GR', '1 ', 'iso-8859-7' ], 'en_GB' => [ 'English', 'English', 'en_GB', '12345', 'iso-8859-15' ], 'en_US' => [ 'English (American)', 'English (American)', 'en_US', ' 5', 'C' ], 'en_IE' => [ 'English (Ireland)', 'English (Ireland)', 'en_IE', '1 ', 'iso-8859-15', 'en_IE:en_GB:en' ], 'eo' => [ 'Esperanto', 'Esperanto', 'eo_XX', '12345', 'unicode' ], 'es' => [ 'Spanish', 'Espanol', 'es_ES', '1 3 5', 'iso-8859-15' ], 'et' => [ 'Estonian', 'Eesti', 'et_EE', '1 ', 'iso-8859-15' ], 'eu' => [ 'Euskara (Basque)', 'Euskara', 'eu_ES', '1 ', 'utf_lat1' ], 'fa' => [ 'Farsi (Iranian)', 'AA Farsi', 'fa_IR', ' 2 ', 'utf_ar' ], 'fi' => [ 'Finnish (Suomi)', 'Suomi', 'fi_FI', '1 ', 'iso-8859-15' ], #- 'tl' in priority position for now, as 'fil' is not much used. #- Monolingual window managers will not see the menus otherwise 'fil' => [ 'Filipino', 'Filipino', 'fil_PH', ' 2 ', 'utf_lat1', 'tl:fil' ], 'fo' => [ 'Faroese', 'Foroyskt', 'fo_FO', '1 ', 'utf_lat1' ], 'fr' => [ 'French', 'Francais', 'fr_FR', '1 345', 'iso-8859-15' ], 'fur' => [ 'Furlan', 'Furlan', 'fur_IT', '1 ', 'utf_lat1', 'fur:it_IT:it' ], 'fy' => [ 'Frisian', 'Frysk', 'fy_NL', '1 ', 'utf_lat1' ], 'ga' => [ 'Gaelic (Irish)', 'Gaeilge', 'ga_IE', '1 ', 'utf_lat1', 'ga:en_IE:en_GB:en' ], #'gd' => [ 'Gaelic (Scottish)', 'Gaidhlig', 'gd_GB', '1 ', 'utf_lat8', 'gd:en_GB:en' ], 'gl' => [ 'Galician', 'Galego', 'gl_ES', '1 ', 'iso-8859-15', 'gl:es_ES:es:pt:pt_BR' ], #- gn_PY not yet done, using es_PY locale instead 'gn' => [ 'Guarani', 'Avane-e', 'es_PY', ' 5', 'utf_lat1', 'gn:es_PY:es' ], 'gu' => [ 'Gujarati', 'ZZ Gujarati', 'gu_IN', ' 2 ', 'unicode' ], #'gv' => [ 'Gaelic (Manx)', 'Gaelg', 'gv_GB', '1 ', 'utf_lat8', 'gv:en_GB:en' ], 'ha' => [ 'Hausa', 'Hausa', 'ha_NG', ' 3 ', 'utf_yo', 'ha:en_NG' ], 'he' => [ 'Hebrew', 'AA Ivrit', 'he_IL', ' 2 ', 'utf_he' ], 'hi' => [ 'Hindi', 'ZZ Hindi', 'hi_IN', ' 2 ', 'utf_deva' ], 'hr' => [ 'Croatian', 'Hrvatski', 'hr_HR', '1 ', 'iso-8859-2' ], 'hu' => [ 'Hungarian', 'Magyar', 'hu_HU', '1 ', 'iso-8859-2' ], 'hy' => [ 'Armenian', 'ZZ Armenian', 'hy_AM', ' 2 ', 'utf_armn' ], # locale not done yet #'ia' => [ 'Interlingua', 'Interlingua', 'ia_XX', '1 5', 'utf_lat1' ], 'id' => [ 'Indonesian', 'Bahasa Indonesia', 'id_ID', ' 2 ', 'utf_lat1' ], 'ig' => [ 'Igbo', 'Igbo', 'ig_NG', ' 3 ', 'utf_yo', 'ig:en_NG' ], 'is' => [ 'Icelandic', 'Islenska', 'is_IS', '1 ', 'iso-8859-1' ], 'it' => [ 'Italian', 'Italiano', 'it_IT', '1 ', 'iso-8859-15' ], 'iu' => [ 'Inuktitut', 'ZZ Inuktitut', 'iu_CA', ' 5', 'utf_iu' ], 'ja' => [ 'Japanese', 'ZZ Nihongo', 'ja_JP', ' 2 ', 'jisx0208' ], 'ka' => [ 'Georgian', 'ZZ Georgian', 'ka_GE', ' 2 ', 'utf_geor' ], 'kk' => [ 'Kazakh', 'Kazak', 'kk_KZ', ' 2 ', 'utf_cyr2' ], 'kl' => [ 'Greenlandic (inuit)', 'Kalaallisut', 'kl_GL', ' 5', 'utf_lat1' ], 'km' => [ 'Khmer', 'ZZ Khmer', 'km_KH', ' 2 ', 'utf_khmr' ], 'kn' => [ 'Kannada', 'ZZ Kannada', 'kn_IN', ' 2 ', 'utf_knda' ], 'ko' => [ 'Korean', 'ZZ Korea', 'ko_KR', ' 2 ', 'ksc5601' ], 'ku' => [ 'Kurdish', 'Kurdi', 'ku_TR', ' 2 ', 'utf_lat5' ], #-'kw' => [ 'Cornish', 'Kernewek', 'kw_GB', '1 ', 'utf_lat8', 'kw:en_GB:en' ], 'ky' => [ 'Kyrgyz', 'Kyrgyz', 'ky_KG', ' 2 ', 'utf_cyr2' ], #- lb_LU not yet done, using de_LU locale instead 'lb' => [ '[[Luxembourgish', 'Letzebuergesch', 'de_LU', '1 ', 'utf_lat1', 'lb:de_LU' ], 'li' => [ 'Limbourgish', 'Limburgs', 'li_NL', '1 ', 'utf_lat1' ], 'lo' => [ 'Laotian', 'Laotian', 'lo_LA', ' 2 ', 'utf_laoo' ], 'lt' => [ 'Lithuanian', 'Lietuviskai', 'lt_LT', '1 ', 'iso-8859-13' ], #- ltg_LV locale not done yet, using lv_LV for now #- "ltg" is not a standard lang code, ISO-639 code was refused; #- LTG_LV should be used instead (uppercase is for non-standard #- langcodes, as defined by locale naming standard 'ltg' => [ 'Latgalian', 'Latgalisu', 'lv_LV', '1 ', 'utf_lat7', 'ltg:LTG:lv' ], #'lu' => [ 'Luganda', 'Luganda', 'lg_UG', ' 3 ', 'utf_lat1' ], 'lv' => [ 'Latvian', 'Latviesu', 'lv_LV', '1 ', 'iso-8859-13' ], 'mi' => [ 'Maori', 'Maori', 'mi_NZ', ' 4 ', 'utf_lat7' ], 'mk' => [ 'Macedonian', 'Makedonski', 'mk_MK', '1 ', 'utf_cyr1' ], 'ml' => [ 'Malayalam', 'ZZ Malayalam', 'ml_IN', ' 2 ', 'utf_mlym' ], 'mn' => [ 'Mongolian', 'Mongol', 'mn_MN', ' 2 ', 'utf_cyr2' ], 'mr' => [ 'Marathi', 'ZZ Marathi', 'mr_IN', ' 2 ', 'utf_deva' ], 'ms' => [ 'Malay', 'Bahasa Melayu', 'ms_MY', ' 2 ', 'utf_lat1' ], 'mt' => [ 'Maltese', 'Maltin', 'mt_MT', '1 3 ', 'unicode' ], #- "my_MM" not yet done, using "en_US" for now 'my' => [ 'Burmese', 'ZZ Bamaca', 'en_US', ' 2 ', 'utf_mymr', 'my_MM:my' ], 'nb' => [ 'Norwegian Bokmaal', 'Norsk, Bokmal', 'nb_NO', '1 ', 'iso-8859-1', 'nb:no' ], 'nds' => [ 'Low Saxon', 'Platduutsch', 'nds_DE', '1 ', 'utf_lat1', 'nds_DE:nds' ], 'ne' => [ 'Nepali', 'ZZ Nepali', 'ne_NP', ' 2 ', 'utf_deva' ], 'nl' => [ 'Dutch', 'Nederlands', 'nl_NL', '1 ', 'iso-8859-15' ], 'nn' => [ 'Norwegian Nynorsk', 'Norsk, Nynorsk', 'nn_NO', '1 ', 'iso-8859-1', 'nn:no@nynorsk:no_NY:no:nb' ], 'nr' => [ 'Ndebele', 'IsiNdebele', 'nr_ZA', ' 3 ', 'utf_lat1', 'nr:en_ZA' ], 'nso' => [ 'Northern Sotho', 'Sesotho sa Leboa', 'nso_ZA', ' 3 ', 'utf_lat1', 'st:nso:en_ZA' ], 'oc' => [ 'Occitan', 'Occitan', 'oc_FR', '1 ', 'utf_lat1', 'oc:fr_FR:fr' ], 'pa_IN' => [ 'Punjabi (gurmukhi)', 'ZZ Punjabi', 'pa_IN', ' 2 ', 'utf_guru' ], 'pl' => [ 'Polish', 'Polski', 'pl_PL', '1 ', 'iso-8859-2' ], 'pt' => [ 'Portuguese', 'Portugues', 'pt_PT', '1 3 ', 'iso-8859-15', 'pt_PT:pt:pt_BR' ], 'pt_BR' => [ 'Portuguese Brazil', 'Portugues do Brasil', 'pt_BR', ' 5', 'iso-8859-1', 'pt_BR:pt_PT:pt' ], #- qu_PE not yet done, using es_PE locale instead 'qu' => [ 'Quichua', 'Runa Simi', 'es_PE', ' 5', 'utf_lat1', 'qu:es_PE:es' ], 'ro' => [ 'Romanian', 'Romana', 'ro_RO', '1 ', 'iso-8859-2' ], 'ru' => [ 'Russian', 'Russkij', 'ru_RU', '12 ', 'koi8-u' ], 'rw' => [ 'Kinyarwanda', 'Kinyarwanda', 'rw_RW', ' 3 ', 'utf_lat1', 'rw' ], 'sc' => [ 'Sardinian', 'Sardu', 'sc_IT', '1 ', 'utf_lat1', 'sc:it_IT:it' ], 'se' => [ 'Saami', 'Samegiella', 'se_NO', '1 ', 'unicode' ], 'sk' => [ 'Slovak', 'Slovencina', 'sk_SK', '1 ', 'iso-8859-2' ], 'sl' => [ 'Slovenian', 'Slovenscina', 'sl_SI', '1 ', 'iso-8859-2' ], 'so' => [ 'Somali', 'Soomaali', 'so_SO', ' 3 ', 'utf_lat1' ], 'sq' => [ 'Albanian', 'Shqip', 'sq_AL', '1 ', 'iso-8859-1' ], 'sr' => [ 'Serbian Cyrillic', 'Srpska', 'sr_CS', '1 ', 'utf_cyr1', 'sp:sr' ], #- "sh" comes first, because otherwise, due to the way glibc does language #- fallback, if "sr@Latn" is not there but a "sr" (whichs uses cyrillic) #- is there, "sh" will never be used. 'sr@Latn' => [ 'Serbian Latin]]', 'Srpska', 'sr_CS', '1 ', 'unicode', 'sh:sr@Latn' ], 'ss' => [ 'Swati', 'SiSwati', 'ss_ZA', ' 3 ', 'utf_lat1', 'ss:en_ZA' ], 'st' => [ 'Sotho', 'Sesotho', 'st_ZA', ' 3 ', 'utf_lat1', 'st:nso:en_ZA' ], 'sv' => [ 'Swedish', 'Svenska', 'sv_SE', '1 ', 'iso-8859-1' ], 'ta' => [ 'Tamil', 'ZZ Tamil', 'ta_IN', ' 2 ', 'utf_taml' ], 'te' => [ 'Telugu', 'ZZ Telugu', 'te_IN', ' 2 ', 'unicode' ], 'tg' => [ 'Tajik', 'Tojiki', 'tg_TJ', ' 2 ', 'utf_cyr2' ], 'th' => [ 'Thai', 'ZZ Thai', 'th_TH', ' 2 ', 'tis620' ], 'tk' => [ 'Turkmen', 'Turkmence', 'tk_TM', ' 2 ', 'utf_az' ], 'tn' => [ 'Tswana', 'Setswana', 'tn_ZA', ' 3 ', 'utf_lat1', 'tn:en_ZA' ], 'tr' => [ 'Turkish', 'Turkce', 'tr_TR', '12 ', 'iso-8859-9' ], 'ts' => [ 'Tsonga', 'Xitsonga', 'ts_ZA', ' 3 ', 'utf_lat1', 'ts:en_ZA' ], 'tt' => [ 'Tatar', 'Tatarca', 'tt_RU', ' 2 ', 'utf_lat5' ], 'ug' => [ 'Uyghur', 'AA Uyghur', 'ug_CN', ' 2 ', 'utf_ar', 'ug' ], 'uk' => [ 'Ukrainian', 'Ukrayinska', 'uk_UA', '1 ', 'koi8-u' ], 'ur' => [ 'Urdu', 'AA Urdu', 'ur_PK', ' 2 ', 'utf_ar' ], 'uz@Latn' => [ 'Uzbek (latin)', 'Ozbekcha', 'uz_UZ', ' 2 ', 'utf_cyr2', 'uz@Latn:uz' ], 'uz' => [ 'Uzbek (cyrillic)', 'Ozbekcha', 'uz_UZ', ' 2 ', 'utf_cyr2', 'uz@Cyrl:uz' ], 've' => [ 'Venda', 'Tshivenda', 've_ZA', ' 3 ', 'utf_lat1', 've:ven:en_ZA' ], 'vi' => [ 'Vietnamese', 'Tieng Viet', 'vi_VN', ' 2 ', 'utf_vi' ], 'wa' => [ 'Walon', 'Walon', 'wa_BE', '1 ', 'utf_lat1', 'wa:fr_BE:fr' ], #- locale "wen_DE" not done yet, using "de_DE" instead #- wen disabled until we have a perl-install/pixmaps/langs/lang-wen.png for it #'wen' => [ 'Sorbian', 'Sorbian', 'de_DE', '1 ', 'utf_lat1', 'wen' ], 'xh' => [ 'Xhosa', 'IsiXhosa', 'xh_ZA', ' 3 ', 'utf_lat1', 'xh:en_ZA' ], 'yi' => [ 'Yiddish', 'AA Yidish', 'yi_US', '1 ', 'utf_he' ], 'yo' => [ 'Yoruba', 'Yoruba', 'yo_NG', ' 3 ', 'utf_yo', 'yo:en_NG' ], 'zh_CN' => [ 'Chinese Simplified', 'ZZ ZhongWen', 'zh_CN', ' 2 ', 'gb2312', 'zh_CN.GBK:zh_CN.GB2312:zh_CN:zh' ], 'zh_TW' => [ 'Chinese Traditional', 'ZZ ZhongWen', 'zh_TW', ' 2 ', 'Big5', 'zh_TW.Big5:zh_TW:zh_HK:zh' ], 'zu' => [ 'Zulu', 'IsiZulu', 'zu_ZA', ' 3 ', 'utf_lat1', 'xh:en_ZA' ],