Files
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

279 lines
13 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright Epic Games, Inc. All Rights Reserved.
#include "Utilities/ISO639-Map.h"
namespace Electra
{
namespace ISO639
{
struct FCodeEntry
{
const TCHAR* _2B; // ISO-639-2B
const TCHAR* _3; // ISO-639-3 (same as ISO 639-2T)
const TCHAR* _1; // ISO-639-1
};
/*
This map was generated as given by https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes
Elements for which no 2 letter ISO-639-1 exists have been removed.
*/
static TArray<FCodeEntry> CodeMap =
{
{TEXT("aar"), TEXT("aar"), TEXT("aa")}, // Afar
{TEXT("abk"), TEXT("abk"), TEXT("ab")}, // Abkhazian
{TEXT("afr"), TEXT("afr"), TEXT("af")}, // Afrikaans
{TEXT("aka"), TEXT("aka"), TEXT("ak")}, // Akan
{TEXT("amh"), TEXT("amh"), TEXT("am")}, // Amharic
{TEXT("ara"), TEXT("ara"), TEXT("ar")}, // Arabic
{TEXT("arg"), TEXT("arg"), TEXT("an")}, // Aragonese
{TEXT("asm"), TEXT("asm"), TEXT("as")}, // Assamese
{TEXT("ava"), TEXT("ava"), TEXT("av")}, // Avaric
{TEXT("ave"), TEXT("ave"), TEXT("ae")}, // Avestan
{TEXT("aym"), TEXT("aym"), TEXT("ay")}, // Aymara
{TEXT("aze"), TEXT("aze"), TEXT("az")}, // Azerbaijani
{TEXT("bak"), TEXT("bak"), TEXT("ba")}, // Bashkir
{TEXT("bam"), TEXT("bam"), TEXT("bm")}, // Bambara
{TEXT("bel"), TEXT("bel"), TEXT("be")}, // Belarusian
{TEXT("ben"), TEXT("ben"), TEXT("bn")}, // Bengali
{TEXT("bis"), TEXT("bis"), TEXT("bi")}, // Bislama
{TEXT("tib"), TEXT("bod"), TEXT("bo")}, // Tibetan
{TEXT("bos"), TEXT("bos"), TEXT("bs")}, // Bosnian
{TEXT("bre"), TEXT("bre"), TEXT("br")}, // Breton
{TEXT("bul"), TEXT("bul"), TEXT("bg")}, // Bulgarian
{TEXT("cat"), TEXT("cat"), TEXT("ca")}, // Catalan; Valencian
{TEXT("cze"), TEXT("ces"), TEXT("cs")}, // Czech
{TEXT("cha"), TEXT("cha"), TEXT("ch")}, // Chamorro
{TEXT("che"), TEXT("che"), TEXT("ce")}, // Chechen
{TEXT("chu"), TEXT("chu"), TEXT("cu")}, // Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic
{TEXT("chv"), TEXT("chv"), TEXT("cv")}, // Chuvash
{TEXT("cor"), TEXT("cor"), TEXT("kw")}, // Cornish
{TEXT("cos"), TEXT("cos"), TEXT("co")}, // Corsican
{TEXT("cre"), TEXT("cre"), TEXT("cr")}, // Cree
{TEXT("wel"), TEXT("cym"), TEXT("cy")}, // Welsh
{TEXT("dan"), TEXT("dan"), TEXT("da")}, // Danish
{TEXT("ger"), TEXT("deu"), TEXT("de")}, // German
{TEXT("div"), TEXT("div"), TEXT("dv")}, // Divehi; Dhivehi; Maldivian
{TEXT("dzo"), TEXT("dzo"), TEXT("dz")}, // Dzongkha
{TEXT("gre"), TEXT("ell"), TEXT("el")}, // Greek, Modern (1453)
{TEXT("eng"), TEXT("eng"), TEXT("en")}, // English
{TEXT("epo"), TEXT("epo"), TEXT("eo")}, // Esperanto
{TEXT("est"), TEXT("est"), TEXT("et")}, // Estonian
{TEXT("baq"), TEXT("eus"), TEXT("eu")}, // Basque
{TEXT("ewe"), TEXT("ewe"), TEXT("ee")}, // Ewe
{TEXT("fao"), TEXT("fao"), TEXT("fo")}, // Faroese
{TEXT("per"), TEXT("fas"), TEXT("fa")}, // Persian
{TEXT("fij"), TEXT("fij"), TEXT("fj")}, // Fijian
{TEXT("fin"), TEXT("fin"), TEXT("fi")}, // Finnish
{TEXT("fre"), TEXT("fra"), TEXT("fr")}, // French
{TEXT("fry"), TEXT("fry"), TEXT("fy")}, // Western Frisian
{TEXT("ful"), TEXT("ful"), TEXT("ff")}, // Fulah
{TEXT("gla"), TEXT("gla"), TEXT("gd")}, // Gaelic; Scottish Gaelic
{TEXT("gle"), TEXT("gle"), TEXT("ga")}, // Irish
{TEXT("glg"), TEXT("glg"), TEXT("gl")}, // Galician
{TEXT("glv"), TEXT("glv"), TEXT("gv")}, // Manx
{TEXT("grn"), TEXT("grn"), TEXT("gn")}, // Guarani
{TEXT("guj"), TEXT("guj"), TEXT("gu")}, // Gujarati
{TEXT("hat"), TEXT("hat"), TEXT("ht")}, // Haitian; Haitian Creole
{TEXT("hau"), TEXT("hau"), TEXT("ha")}, // Hausa
{TEXT("heb"), TEXT("heb"), TEXT("he")}, // Hebrew
{TEXT("her"), TEXT("her"), TEXT("hz")}, // Herero
{TEXT("hin"), TEXT("hin"), TEXT("hi")}, // Hindi
{TEXT("hmo"), TEXT("hmo"), TEXT("ho")}, // Hiri Motu
{TEXT("hrv"), TEXT("hrv"), TEXT("hr")}, // Croatian
{TEXT("hun"), TEXT("hun"), TEXT("hu")}, // Hungarian
{TEXT("arm"), TEXT("hye"), TEXT("hy")}, // Armenian
{TEXT("ibo"), TEXT("ibo"), TEXT("ig")}, // Igbo
{TEXT("ido"), TEXT("ido"), TEXT("io")}, // Ido
{TEXT("iii"), TEXT("iii"), TEXT("ii")}, // Sichuan Yi; Nuosu
{TEXT("iku"), TEXT("iku"), TEXT("iu")}, // Inuktitut
{TEXT("ile"), TEXT("ile"), TEXT("ie")}, // Interlingue; Occidental
{TEXT("ina"), TEXT("ina"), TEXT("ia")}, // Interlingua (International Auxiliary Language Association)
{TEXT("ind"), TEXT("ind"), TEXT("id")}, // Indonesian
{TEXT("ipk"), TEXT("ipk"), TEXT("ik")}, // Inupiaq
{TEXT("ice"), TEXT("isl"), TEXT("is")}, // Icelandic
{TEXT("ita"), TEXT("ita"), TEXT("it")}, // Italian
{TEXT("jav"), TEXT("jav"), TEXT("jv")}, // Javanese
{TEXT("jpn"), TEXT("jpn"), TEXT("ja")}, // Japanese
{TEXT("kal"), TEXT("kal"), TEXT("kl")}, // Kalaallisut; Greenlandic
{TEXT("kan"), TEXT("kan"), TEXT("kn")}, // Kannada
{TEXT("kas"), TEXT("kas"), TEXT("ks")}, // Kashmiri
{TEXT("geo"), TEXT("kat"), TEXT("ka")}, // Georgian
{TEXT("kau"), TEXT("kau"), TEXT("kr")}, // Kanuri
{TEXT("kaz"), TEXT("kaz"), TEXT("kk")}, // Kazakh
{TEXT("khm"), TEXT("khm"), TEXT("km")}, // Central Khmer
{TEXT("kik"), TEXT("kik"), TEXT("ki")}, // Kikuyu; Gikuyu
{TEXT("kin"), TEXT("kin"), TEXT("rw")}, // Kinyarwanda
{TEXT("kir"), TEXT("kir"), TEXT("ky")}, // Kirghiz; Kyrgyz
{TEXT("kom"), TEXT("kom"), TEXT("kv")}, // Komi
{TEXT("kon"), TEXT("kon"), TEXT("kg")}, // Kongo
{TEXT("kor"), TEXT("kor"), TEXT("ko")}, // Korean
{TEXT("kua"), TEXT("kua"), TEXT("kj")}, // Kuanyama; Kwanyama
{TEXT("kur"), TEXT("kur"), TEXT("ku")}, // Kurdish
{TEXT("lao"), TEXT("lao"), TEXT("lo")}, // Lao
{TEXT("lat"), TEXT("lat"), TEXT("la")}, // Latin
{TEXT("lav"), TEXT("lav"), TEXT("lv")}, // Latvian
{TEXT("lim"), TEXT("lim"), TEXT("li")}, // Limburgan; Limburger; Limburgish
{TEXT("lin"), TEXT("lin"), TEXT("ln")}, // Lingala
{TEXT("lit"), TEXT("lit"), TEXT("lt")}, // Lithuanian
{TEXT("ltz"), TEXT("ltz"), TEXT("lb")}, // Luxembourgish; Letzeburgesch
{TEXT("lub"), TEXT("lub"), TEXT("lu")}, // Luba-Katanga
{TEXT("lug"), TEXT("lug"), TEXT("lg")}, // Ganda
{TEXT("mah"), TEXT("mah"), TEXT("mh")}, // Marshallese
{TEXT("mal"), TEXT("mal"), TEXT("ml")}, // Malayalam
{TEXT("mar"), TEXT("mar"), TEXT("mr")}, // Marathi
{TEXT("mac"), TEXT("mkd"), TEXT("mk")}, // Macedonian
{TEXT("mlg"), TEXT("mlg"), TEXT("mg")}, // Malagasy
{TEXT("mlt"), TEXT("mlt"), TEXT("mt")}, // Maltese
{TEXT("mon"), TEXT("mon"), TEXT("mn")}, // Mongolian
{TEXT("mao"), TEXT("mri"), TEXT("mi")}, // Māori
{TEXT("may"), TEXT("msa"), TEXT("ms")}, // Malay
{TEXT("bur"), TEXT("mya"), TEXT("my")}, // Burmese
{TEXT("nau"), TEXT("nau"), TEXT("na")}, // Nauru
{TEXT("nav"), TEXT("nav"), TEXT("nv")}, // Navajo; Navaho
{TEXT("nbl"), TEXT("nbl"), TEXT("nr")}, // Ndebele, South; South Ndebele
{TEXT("nde"), TEXT("nde"), TEXT("nd")}, // Ndebele, North; North Ndebele
{TEXT("ndo"), TEXT("ndo"), TEXT("ng")}, // Ndonga
{TEXT("nep"), TEXT("nep"), TEXT("ne")}, // Nepali
{TEXT("dut"), TEXT("nld"), TEXT("nl")}, // Dutch; Flemish
{TEXT("nno"), TEXT("nno"), TEXT("nn")}, // Norwegian Nynorsk; Nynorsk, Norwegian
{TEXT("nob"), TEXT("nob"), TEXT("nb")}, // Bokmål, Norwegian; Norwegian Bokmål
{TEXT("nor"), TEXT("nor"), TEXT("no")}, // Norwegian
{TEXT("nya"), TEXT("nya"), TEXT("ny")}, // Chichewa; Chewa; Nyanja
{TEXT("oci"), TEXT("oci"), TEXT("oc")}, // Occitan (post 1500)
{TEXT("oji"), TEXT("oji"), TEXT("oj")}, // Ojibwa
{TEXT("ori"), TEXT("ori"), TEXT("or")}, // Oriya
{TEXT("orm"), TEXT("orm"), TEXT("om")}, // Oromo
{TEXT("oss"), TEXT("oss"), TEXT("os")}, // Ossetian; Ossetic
{TEXT("pan"), TEXT("pan"), TEXT("pa")}, // Panjabi; Punjabi
{TEXT("pli"), TEXT("pli"), TEXT("pi")}, // Pali
{TEXT("pol"), TEXT("pol"), TEXT("pl")}, // Polish
{TEXT("por"), TEXT("por"), TEXT("pt")}, // Portuguese
{TEXT("pus"), TEXT("pus"), TEXT("ps")}, // Pushto; Pashto
{TEXT("que"), TEXT("que"), TEXT("qu")}, // Quechua
{TEXT("roh"), TEXT("roh"), TEXT("rm")}, // Romansh
{TEXT("rum"), TEXT("ron"), TEXT("ro")}, // Romanian; Moldavian; Moldovan
{TEXT("run"), TEXT("run"), TEXT("rn")}, // Rundi
{TEXT("rus"), TEXT("rus"), TEXT("ru")}, // Russian
{TEXT("sag"), TEXT("sag"), TEXT("sg")}, // Sango
{TEXT("san"), TEXT("san"), TEXT("sa")}, // Sanskrit
{TEXT("sin"), TEXT("sin"), TEXT("si")}, // Sinhala; Sinhalese
{TEXT("slo"), TEXT("slk"), TEXT("sk")}, // Slovak
{TEXT("slv"), TEXT("slv"), TEXT("sl")}, // Slovenian
{TEXT("sme"), TEXT("sme"), TEXT("se")}, // Northern Sami
{TEXT("smo"), TEXT("smo"), TEXT("sm")}, // Samoan
{TEXT("sna"), TEXT("sna"), TEXT("sn")}, // Shona
{TEXT("snd"), TEXT("snd"), TEXT("sd")}, // Sindhi
{TEXT("som"), TEXT("som"), TEXT("so")}, // Somali
{TEXT("sot"), TEXT("sot"), TEXT("st")}, // Sotho, Southern
{TEXT("spa"), TEXT("spa"), TEXT("es")}, // Spanish; Castilian
{TEXT("alb"), TEXT("sqi"), TEXT("sq")}, // Albanian
{TEXT("srd"), TEXT("srd"), TEXT("sc")}, // Sardinian
{TEXT("srp"), TEXT("srp"), TEXT("sr")}, // Serbian
{TEXT("ssw"), TEXT("ssw"), TEXT("ss")}, // Swati
{TEXT("sun"), TEXT("sun"), TEXT("su")}, // Sundanese
{TEXT("swa"), TEXT("swa"), TEXT("sw")}, // Swahili
{TEXT("swe"), TEXT("swe"), TEXT("sv")}, // Swedish
{TEXT("tah"), TEXT("tah"), TEXT("ty")}, // Tahitian
{TEXT("tam"), TEXT("tam"), TEXT("ta")}, // Tamil
{TEXT("tat"), TEXT("tat"), TEXT("tt")}, // Tatar
{TEXT("tel"), TEXT("tel"), TEXT("te")}, // Telugu
{TEXT("tgk"), TEXT("tgk"), TEXT("tg")}, // Tajik
{TEXT("tgl"), TEXT("tgl"), TEXT("tl")}, // Tagalog
{TEXT("tha"), TEXT("tha"), TEXT("th")}, // Thai
{TEXT("tir"), TEXT("tir"), TEXT("ti")}, // Tigrinya
{TEXT("ton"), TEXT("ton"), TEXT("to")}, // Tonga (Tonga Islands)
{TEXT("tsn"), TEXT("tsn"), TEXT("tn")}, // Tswana
{TEXT("tso"), TEXT("tso"), TEXT("ts")}, // Tsonga
{TEXT("tuk"), TEXT("tuk"), TEXT("tk")}, // Turkmen
{TEXT("tur"), TEXT("tur"), TEXT("tr")}, // Turkish
{TEXT("twi"), TEXT("twi"), TEXT("tw")}, // Twi
{TEXT("uig"), TEXT("uig"), TEXT("ug")}, // Uighur; Uyghur
{TEXT("ukr"), TEXT("ukr"), TEXT("uk")}, // Ukrainian
{TEXT("urd"), TEXT("urd"), TEXT("ur")}, // Urdu
{TEXT("uzb"), TEXT("uzb"), TEXT("uz")}, // Uzbek
{TEXT("ven"), TEXT("ven"), TEXT("ve")}, // Venda
{TEXT("vie"), TEXT("vie"), TEXT("vi")}, // Vietnamese
{TEXT("vol"), TEXT("vol"), TEXT("vo")}, // Volapük
{TEXT("wln"), TEXT("wln"), TEXT("wa")}, // Walloon
{TEXT("wol"), TEXT("wol"), TEXT("wo")}, // Wolof
{TEXT("xho"), TEXT("xho"), TEXT("xh")}, // Xhosa
{TEXT("yid"), TEXT("yid"), TEXT("yi")}, // Yiddish
{TEXT("yor"), TEXT("yor"), TEXT("yo")}, // Yoruba
{TEXT("zha"), TEXT("zha"), TEXT("za")}, // Zhuang; Chuang
{TEXT("chi"), TEXT("zho"), TEXT("zh")}, // Chinese
{TEXT("zul"), TEXT("zul"), TEXT("zu")} // Zulu
};
// Undefined
static const TCHAR* const CodeUnd(TEXT("und"));
const TCHAR* Get639_1(const FString& InFrom639_1_2_3)
{
// Already a 2 letter code?
if (InFrom639_1_2_3.Len() == 2)
{
// Look it up in the table if it exists.
int32 Index = CodeMap.IndexOfByPredicate([&](const FCodeEntry& Entry) { return InFrom639_1_2_3.Equals(Entry._1); });
if (Index != INDEX_NONE)
{
return CodeMap[Index]._1;
}
}
else if (InFrom639_1_2_3.Len() == 3)
{
// Three letter code, either ISO-639-3 or ISO-639-2B. Look it up in either table.
int32 Index = CodeMap.IndexOfByPredicate([&](const FCodeEntry& Entry) { return InFrom639_1_2_3.Equals(Entry._3); });
if (Index == INDEX_NONE)
{
Index = CodeMap.IndexOfByPredicate([&](const FCodeEntry& Entry) { return InFrom639_1_2_3.Equals(Entry._2B); });
}
if (Index != INDEX_NONE)
{
return CodeMap[Index]._1;
}
else
{
/*
The standard includes some codes for special situations:
mis, for "uncoded languages"
mul, for "multiple languages"
qaa-qtz, a range reserved for local use
und, for "undetermined"
zxx, for "no linguistic content; not applicable"
*/
if (InFrom639_1_2_3.Equals(TEXT("mis")) || InFrom639_1_2_3.Equals(TEXT("zxx")) || InFrom639_1_2_3.Equals(TEXT("mul")))
{
// Map those to "und", even though this is not a 639-1 code!
return CodeUnd;
}
}
}
return nullptr;
}
FString MapTo639_1(const FString& InFrom639_1_2_3)
{
const TCHAR* Mapped = Get639_1(InFrom639_1_2_3);
return Mapped ? Mapped : InFrom639_1_2_3;
}
FString RFC5646To639_1(const FString& InFromRFC5646)
{
FString s(InFromRFC5646);
// We only look at the primary language tag before the first '-',
// which by definition happens to be the "shortest ISO 639 code".
int32 DashPos = INDEX_NONE;
if (s.FindChar(TCHAR('-'), DashPos))
{
s.LeftInline(DashPos);
}
return MapTo639_1(s);
}
}
}