lxgui
gui_localizer.cpp
1 #include "lxgui/gui_localizer.hpp"
2 
3 #include "lxgui/gui_exception.hpp"
4 #include "lxgui/gui_out.hpp"
5 #include "lxgui/utils_file_system.hpp"
6 #include "lxgui/utils_range.hpp"
7 #include "lxgui/utils_string.hpp"
8 #include "lxgui/utils_variant.hpp"
9 
10 #include <cstdlib>
11 #include <cstring>
12 #include <fmt/args.h>
13 #include <functional>
14 #include <lxgui/extern_sol2_state.hpp>
15 
16 namespace lxgui::gui {
17 
18 namespace {
19 
20 std::string get_environment_variable(const std::string& name) {
21 #if defined(LXGUI_PLATFORM_WINDOWS)
22  // Windows has std::getenv, but MSVC offers a safer alternative that it insists on using
23  char* buffer = nullptr;
24  std::size_t size = 0;
25  if (_dupenv_s(&buffer, &size, name.c_str()) != 0 || buffer == nullptr)
26  return "";
27 
28  std::string result = buffer;
29  free(buffer);
30  return result;
31 #else
32  const char* result = std::getenv(name.c_str());
33  return result != nullptr ? result : "";
34 #endif
35 }
36 
37 std::vector<std::string> get_default_languages() {
38  // First try parsing the LANGUAGE environment variable.
39  // This is the best, because it lets the user specify a list of languages
40  // in descending priority, so if a translation is unavailable in their
41  // primary language, they may still get another match which would be
42  // better for them than the default enUS (e.g., a French person could
43  // prefer to fall back on a Spanish translation rather than English).
44  const std::string language_var = get_environment_variable("LANGUAGE");
45  if (!language_var.empty()) {
46  std::vector<std::string> output;
47  for (auto language : utils::cut(language_var, ":")) {
48  std::string language_normalized{language};
49  utils::replace(language_normalized, "_", "");
50  if (language_normalized.size() == 4)
51  output.push_back(language_normalized);
52  }
53 
54  if (!output.empty())
55  return output;
56  }
57 
58 #if defined(LXGUI_PLATFORM_WINDOWS)
59  // If LANGUAGE is not specified, on Windows, try OS-specific function.
60  // TODO: https://github.com/cschreib/lxgui/issues/95
61 #endif
62 
63  // If LANGUAGE is not specified or empty, try LANG.
64  std::string lang = get_environment_variable("LANG");
65  if (!lang.empty()) {
66  auto pos1 = lang.find_first_of(".@");
67  if (pos1 != std::string::npos)
68  lang = lang.substr(0, pos1);
69 
70  utils::replace(lang, "_", "");
71  if (lang.size() == 4)
72  return {lang};
73  }
74 
75  return {"enUS"};
76 }
77 } // namespace
78 
80  try {
81  // Try to set locale to system default.
82  locale_ = std::locale("");
83  } catch (const std::exception& exception) {
84  // Revert to C locale.
85  locale_ = std::locale::classic();
86  gui::out << gui::error << "gui::locale: " << exception.what() << std::endl;
87  gui::out << gui::error << "gui::locale: reverting to default classic locale" << std::endl;
88  }
89 
90  // Find default languages.
93 
94  // Set up Lua sandbox.
95  lua_.open_libraries(
96  sol::lib::base, sol::lib::math, sol::lib::table, sol::lib::io, sol::lib::os,
97  sol::lib::string, sol::lib::debug);
98 
99  // Give the translation Lua state the localize_string and format_string function, so
100  // it can call it recursively as needed, but not the other functions
101  // which could load more translation strings.
102  lua_.set_function("localize_string", [&](const std::string& key, sol::variadic_args args) {
103  return localize(key, args);
104  });
105  lua_.set_function("format_string", [&](const std::string& key, sol::variadic_args args) {
106  return format_string(key, args);
107  });
108 }
109 
110 void localizer::set_locale(const std::locale& locale) {
111  if (locale_ == locale)
112  return;
113 
114  locale_ = locale;
116 }
117 
118 void localizer::set_preferred_languages(const std::vector<std::string>& languages) {
119 #if 0
120  // TODO implement more generic input checks
121  // https://github.com/cschreib/lxgui/issues/98
122  for (const auto& language: languages) {
123  if (language.size() != 4) {
124  throw gui::exception("gui::localizer", "language code must have exactly 4 characters");
125  }
126  }
127 #endif
128 
129  languages_ = languages;
131 }
132 
134  set_preferred_languages(get_default_languages());
135 }
136 
137 const std::locale& localizer::get_locale() const {
138  return locale_;
139 }
140 
141 const std::vector<std::string>& localizer::get_preferred_languages() const {
142  return languages_;
143 }
144 
146  code_points_.clear();
147 }
148 
150  if (range.last < range.first)
151  throw gui::exception("gui::localizer", "code point range must have last >= first");
152 
153  code_point_range test_range = range;
154  auto iter = code_points_.begin();
155 
156  do {
157  // Find next overlapping range
158  iter = std::find_if(code_points_.begin(), code_points_.end(), [&](const auto& other) {
159  return (test_range.first >= other.first && test_range.first <= other.last) ||
160  (test_range.last >= other.first && test_range.last <= other.last) ||
161  (other.first >= test_range.first && other.first <= test_range.last) ||
162  (other.last >= test_range.first && other.last <= test_range.last);
163  });
164 
165  if (iter != code_points_.end()) {
166  // Combine the ranges
167  test_range.first = std::min(test_range.first, iter->first);
168  test_range.last = std::max(test_range.last, iter->last);
169 
170  // Erase the overlap
171  code_points_.erase(iter);
172  }
173  } while (iter != code_points_.end());
174 
175  // Add the new range
176  code_points_.push_back(test_range);
177 
178  // Sort by ascending code point
179  std::sort(code_points_.begin(), code_points_.end(), [](const auto& left, const auto& right) {
180  return left.first < right.first;
181  });
182 }
183 
184 void localizer::add_allowed_code_points_for_group(const std::string& unicode_group) {
185  // List from http://www.unicode.org/Public/5.2.0/ucdxml/ucd.all.flat.zip
186  // Adjusted "basic latin" and "latin-1 supplement" to remove non-printable chars.
187  static std::unordered_map<std::string, code_point_range> unicode_groups = {
188  {"basic latin", {0x0020, 0x007e}},
189  {"latin-1 supplement", {0x00a0, 0x00ff}},
190  {"latin extended-a", {0x0100, 0x017f}},
191  {"latin extended-b", {0x0180, 0x024f}},
192  {"ipa extensions", {0x0250, 0x02af}},
193  {"spacing modifier letters", {0x02b0, 0x02ff}},
194  {"combining diacritical marks", {0x0300, 0x036f}},
195  {"greek and coptic", {0x0370, 0x03ff}},
196  {"cyrillic", {0x0400, 0x04ff}},
197  {"cyrillic supplement", {0x0500, 0x052f}},
198  {"armenian", {0x0530, 0x058f}},
199  {"hebrew", {0x0590, 0x05ff}},
200  {"arabic", {0x0600, 0x06ff}},
201  {"syriac", {0x0700, 0x074f}},
202  {"arabic supplement", {0x0750, 0x077f}},
203  {"thaana", {0x0780, 0x07bf}},
204  {"nko", {0x07c0, 0x07ff}},
205  {"samaritan", {0x0800, 0x083f}},
206  // added manually! source https://en.wikipedia.org/wiki/Mandaic_script
207  {"mandaic", {0x0840, 0x085f}},
208  // added manually! source https://en.wikipedia.org/wiki/Arabic_alphabet
209  {"arabic extended-a", {0x08a0, 0x08ff}},
210  {"devanagari", {0x0900, 0x097f}},
211  {"bengali", {0x0980, 0x09ff}},
212  {"gurmukhi", {0x0a00, 0x0a7f}},
213  {"gujarati", {0x0a80, 0x0aff}},
214  {"oriya", {0x0b00, 0x0b7f}},
215  {"tamil", {0x0b80, 0x0bff}},
216  {"telugu", {0x0c00, 0x0c7f}},
217  {"kannada", {0x0c80, 0x0cff}},
218  {"malayalam", {0x0d00, 0x0d7f}},
219  {"sinhala", {0x0d80, 0x0dff}},
220  {"thai", {0x0e00, 0x0e7f}},
221  {"lao", {0x0e80, 0x0eff}},
222  {"tibetan", {0x0f00, 0x0fff}},
223  {"myanmar", {0x1000, 0x109f}},
224  {"georgian", {0x10a0, 0x10ff}},
225  {"hangul jamo", {0x1100, 0x11ff}},
226  {"ethiopic", {0x1200, 0x137f}},
227  {"ethiopic supplement", {0x1380, 0x139f}},
228  {"cherokee", {0x13a0, 0x13ff}},
229  {"unified canadian aboriginal syllabics", {0x1400, 0x167f}},
230  {"ogham", {0x1680, 0x169f}},
231  {"runic", {0x16a0, 0x16ff}},
232  {"tagalog", {0x1700, 0x171f}},
233  {"hanunoo", {0x1720, 0x173f}},
234  {"buhid", {0x1740, 0x175f}},
235  {"tagbanwa", {0x1760, 0x177f}},
236  {"khmer", {0x1780, 0x17ff}},
237  {"mongolian", {0x1800, 0x18af}},
238  {"unified canadian aboriginal syllabics extended", {0x18b0, 0x18ff}},
239  {"limbu", {0x1900, 0x194f}},
240  {"tai le", {0x1950, 0x197f}},
241  {"new tai lue", {0x1980, 0x19df}},
242  {"khmer symbols", {0x19e0, 0x19ff}},
243  {"buginese", {0x1a00, 0x1a1f}},
244  {"tai tham", {0x1a20, 0x1aaf}},
245  {"balinese", {0x1b00, 0x1b7f}},
246  {"sundanese", {0x1b80, 0x1bbf}},
247  // added manually! source https://en.wikipedia.org/wiki/Batak_script
248  {"batak", {0x1bc0, 0x1bff}},
249  {"lepcha", {0x1c00, 0x1c4f}},
250  {"ol chiki", {0x1c50, 0x1c7f}},
251  // added manually! source https://en.wikipedia.org/wiki/Cyrillic_script
252  {"cyrillic extended-c", {0x1c80, 0x1c8f}},
253  {"vedic extensions", {0x1cd0, 0x1cff}},
254  {"phonetic extensions", {0x1d00, 0x1d7f}},
255  {"phonetic extensions supplement", {0x1d80, 0x1dbf}},
256  {"combining diacritical marks supplement", {0x1dc0, 0x1dff}},
257  {"latin extended additional", {0x1e00, 0x1eff}},
258  {"greek extended", {0x1f00, 0x1fff}},
259  {"general punctuation", {0x2000, 0x206f}},
260  {"superscripts and subscripts", {0x2070, 0x209f}},
261  {"currency symbols", {0x20a0, 0x20cf}},
262  {"combining diacritical marks for symbols", {0x20d0, 0x20ff}},
263  {"letterlike symbols", {0x2100, 0x214f}},
264  {"number forms", {0x2150, 0x218f}},
265  {"arrows", {0x2190, 0x21ff}},
266  {"mathematical operators", {0x2200, 0x22ff}},
267  {"miscellaneous technical", {0x2300, 0x23ff}},
268  {"control pictures", {0x2400, 0x243f}},
269  {"optical character recognition", {0x2440, 0x245f}},
270  {"enclosed alphanumerics", {0x2460, 0x24ff}},
271  {"box drawing", {0x2500, 0x257f}},
272  {"block elements", {0x2580, 0x259f}},
273  {"geometric shapes", {0x25a0, 0x25ff}},
274  {"miscellaneous symbols", {0x2600, 0x26ff}},
275  {"dingbats", {0x2700, 0x27bf}},
276  {"miscellaneous mathematical symbols-a", {0x27c0, 0x27ef}},
277  {"supplemental arrows-a", {0x27f0, 0x27ff}},
278  {"braille patterns", {0x2800, 0x28ff}},
279  {"supplemental arrows-b", {0x2900, 0x297f}},
280  {"miscellaneous mathematical symbols-b", {0x2980, 0x29ff}},
281  {"supplemental mathematical operators", {0x2a00, 0x2aff}},
282  {"miscellaneous symbols and arrows", {0x2b00, 0x2bff}},
283  {"glagolitic", {0x2c00, 0x2c5f}},
284  {"latin extended-c", {0x2c60, 0x2c7f}},
285  {"coptic", {0x2c80, 0x2cff}},
286  {"georgian supplement", {0x2d00, 0x2d2f}},
287  {"tifinagh", {0x2d30, 0x2d7f}},
288  {"ethiopic extended", {0x2d80, 0x2ddf}},
289  {"cyrillic extended-a", {0x2de0, 0x2dff}},
290  {"supplemental punctuation", {0x2e00, 0x2e7f}},
291  {"cjk radicals supplement", {0x2e80, 0x2eff}},
292  {"kangxi radicals", {0x2f00, 0x2fdf}},
293  {"ideographic description characters", {0x2ff0, 0x2fff}},
294  {"cjk symbols and punctuation", {0x3000, 0x303f}},
295  {"hiragana", {0x3040, 0x309f}},
296  {"katakana", {0x30a0, 0x30ff}},
297  {"bopomofo", {0x3100, 0x312f}},
298  {"hangul compatibility jamo", {0x3130, 0x318f}},
299  {"kanbun", {0x3190, 0x319f}},
300  {"bopomofo extended", {0x31a0, 0x31bf}},
301  {"cjk strokes", {0x31c0, 0x31ef}},
302  {"katakana phonetic extensions", {0x31f0, 0x31ff}},
303  {"enclosed cjk letters and months", {0x3200, 0x32ff}},
304  {"cjk compatibility", {0x3300, 0x33ff}},
305  {"cjk unified ideographs extension a", {0x3400, 0x4dbf}},
306  {"yijing hexagram symbols", {0x4dc0, 0x4dff}},
307  {"cjk unified ideographs", {0x4e00, 0x9fff}},
308  {"yi syllables", {0xa000, 0xa48f}},
309  {"yi radicals", {0xa490, 0xa4cf}},
310  {"lisu", {0xa4d0, 0xa4ff}},
311  {"vai", {0xa500, 0xa63f}},
312  {"cyrillic extended-b", {0xa640, 0xa69f}},
313  {"bamum", {0xa6a0, 0xa6ff}},
314  {"modifier tone letters", {0xa700, 0xa71f}},
315  {"latin extended-d", {0xa720, 0xa7ff}},
316  {"syloti nagri", {0xa800, 0xa82f}},
317  {"common indic number forms", {0xa830, 0xa83f}},
318  {"phags-pa", {0xa840, 0xa87f}},
319  {"saurashtra", {0xa880, 0xa8df}},
320  {"devanagari extended", {0xa8e0, 0xa8ff}},
321  {"kayah li", {0xa900, 0xa92f}},
322  {"rejang", {0xa930, 0xa95f}},
323  {"hangul jamo extended-a", {0xa960, 0xa97f}},
324  {"javanese", {0xa980, 0xa9df}},
325  {"cham", {0xaa00, 0xaa5f}},
326  {"myanmar extended-a", {0xaa60, 0xaa7f}},
327  {"tai viet", {0xaa80, 0xaadf}},
328  // added manually! source https://en.wikipedia.org/wiki/Latin_script_in_Unicode
329  {"latin extended-e", {0xab30, 0xab6f}},
330  {"meetei mayek", {0xabc0, 0xabff}},
331  {"hangul syllables", {0xac00, 0xd7af}},
332  {"hangul jamo extended-b", {0xd7b0, 0xd7ff}},
333  {"high surrogates", {0xd800, 0xdb7f}},
334  {"high private use surrogates", {0xdb80, 0xdbff}},
335  {"low surrogates", {0xdc00, 0xdfff}},
336  {"private use area", {0xe000, 0xf8ff}},
337  {"cjk compatibility ideographs", {0xf900, 0xfaff}},
338  {"alphabetic presentation forms", {0xfb00, 0xfb4f}},
339  {"arabic presentation forms-a", {0xfb50, 0xfdff}},
340  {"variation selectors", {0xfe00, 0xfe0f}},
341  {"vertical forms", {0xfe10, 0xfe1f}},
342  {"combining half marks", {0xfe20, 0xfe2f}},
343  {"cjk compatibility forms", {0xfe30, 0xfe4f}},
344  {"small form variants", {0xfe50, 0xfe6f}},
345  {"arabic presentation forms-b", {0xfe70, 0xfeff}},
346  {"halfwidth and fullwidth forms", {0xff00, 0xffef}},
347  {"specials", {0xfff0, 0xffff}},
348  {"linear b syllabary", {0x10000, 0x1007f}},
349  {"linear b ideograms", {0x10080, 0x100ff}},
350  // added manually! source https://en.wikipedia.org/wiki/Caucasian_Albanian_script
351  {"caucasian albanian", {0x10530, 0x1056f}},
352  // added manually! source https://en.wikipedia.org/wiki/Linear_A
353  {"linear a", {0x10600, 0x1077f}},
354  {"aegean numbers", {0x10100, 0x1013f}},
355  {"ancient greek numbers", {0x10140, 0x1018f}},
356  {"ancient symbols", {0x10190, 0x101cf}},
357  {"phaistos disc", {0x101d0, 0x101ff}},
358  {"lycian", {0x10280, 0x1029f}},
359  {"carian", {0x102a0, 0x102df}},
360  {"old italic", {0x10300, 0x1032f}},
361  {"gothic", {0x10330, 0x1034f}},
362  {"permic", {0x10350, 0x1037f}},
363  {"ugaritic", {0x10380, 0x1039f}},
364  {"old persian", {0x103a0, 0x103df}},
365  {"deseret", {0x10400, 0x1044f}},
366  {"shavian", {0x10450, 0x1047f}},
367  {"osmanya", {0x10480, 0x104af}},
368  // added manually! source https://en.wikipedia.org/wiki/Osage_script
369  {"osage", {0x104b0, 0x104ff}},
370  // added manually! source https://en.wikipedia.org/wiki/Elbasan_script
371  {"elbasan", {0x10500, 0x1052f}},
372  // added manually! source https://en.wikipedia.org/wiki/Latin_script_in_Unicode
373  {"latin extended-f", {0x10780, 0x107bf}},
374  {"cypriot syllabary", {0x10800, 0x1083f}},
375  {"imperial aramaic", {0x10840, 0x1085f}},
376  {"phoenician", {0x10900, 0x1091f}},
377  {"lydian", {0x10920, 0x1093f}},
378  {"meroitic", {0x10980, 0x109ff}},
379  {"kharoshthi", {0x10a00, 0x10a5f}},
380  {"old south arabian", {0x10a60, 0x10a7f}},
381  // added manually! source https://en.wikipedia.org/wiki/Old_North_Arabian_(Unicode_block)
382  {"old north arabian", {0x10a80, 0x10a9f}},
383  // added manually! source https://en.wikipedia.org/wiki/Manichaean_script
384  {"manichaean", {0x10ac0, 0x10aff}},
385  {"avestan", {0x10b00, 0x10b3f}},
386  {"inscriptional parthian", {0x10b40, 0x10b5f}},
387  {"inscriptional pahlavi", {0x10b60, 0x10b7f}},
388  {"old turkic", {0x10c00, 0x10c4f}},
389  {"rumi numeral symbols", {0x10e60, 0x10e7f}},
390  {"kaithi", {0x11080, 0x110cf}},
391  // added manually! source https://en.wikipedia.org/wiki/Sorang_Sompeng_script
392  {"sora sompeng", {0x110d0, 0x110ff}},
393  // added manually! source https://en.wikipedia.org/wiki/Chakma_script
394  {"chakma", {0x11100, 0x1114f}},
395  // added manually! source https://en.wikipedia.org/wiki/Mahajani
396  {"mahajani", {0x11150, 0x1117f}},
397  // added manually! source https://en.wikipedia.org/wiki/Sharada_(Unicode_block)
398  {"sharada", {0x11180, 0x111df}},
399  // added manually! source https://en.wikipedia.org/wiki/Khojki_script
400  {"khojki", {0x11200, 0x1124f}},
401  // added manually! source https://en.wikipedia.org/wiki/Khudabadi_script
402  {"khudawadi", {0x112b0, 0x112ff}},
403  // added manually! source https://en.wikipedia.org/wiki/Grantha_script
404  {"grantha", {0x11300, 0x1137f}},
405  // added manually! source https://en.wikipedia.org/wiki/Tirhuta_script
406  {"tirhuta", {0x11480, 0x114df}},
407  // added manually! source https://en.wikipedia.org/wiki/Siddha%E1%B9%83_script
408  {"siddham", {0x11580, 0x115ff}},
409  // added manually! source https://en.wikipedia.org/wiki/Modi_script
410  {"modi", {0x11600, 0x1165f}},
411  // added manually! source https://en.wikipedia.org/wiki/Takri_script
412  {"takri", {0x11680, 0x116cf}},
413  // added manually! source https://en.wikipedia.org/wiki/Warang_Citi
414  {"varang kshiti", {0x118a0, 0x118ff}},
415  {"cuneiform", {0x12000, 0x123ff}},
416  {"cuneiform numbers and punctuation", {0x12400, 0x1247f}},
417  {"egyptian hieroglyphs", {0x13000, 0x1342f}},
418  {"byzantine musical symbols", {0x1d000, 0x1d0ff}},
419  {"musical symbols", {0x1d100, 0x1d1ff}},
420  {"ancient greek musical notation", {0x1d200, 0x1d24f}},
421  {"tai xuan jing symbols", {0x1d300, 0x1d35f}},
422  {"counting rod numerals", {0x1d360, 0x1d37f}},
423  {"mathematical alphanumeric symbols", {0x1d400, 0x1d7ff}},
424  {"adlam", {0x1e800, 0x1e8df}},
425  // added manually! source https://en.wikipedia.org/wiki/Mende_Kikakui_script
426  {"mende", {0x1e900, 0x1e95f}},
427  // added manually! source https://en.wikipedia.org/wiki/Arabic_alphabet
428  {"arabic mathematical alphabetic symbols", {0x1ee00, 0x1eeff}},
429  {"mahjong tiles", {0x1f000, 0x1f02f}},
430  {"domino tiles", {0x1f030, 0x1f09f}},
431  {"enclosed alphanumeric supplement", {0x1f100, 0x1f1ff}},
432  {"enclosed ideographic supplement", {0x1f200, 0x1f2ff}},
433  // added manually! source https://en.wikipedia.org/wiki/Mro_(Unicode_block)
434  {"mro", {0x16a40, 0x16a6f}},
435  // added manually! source https://en.wikipedia.org/wiki/Pahawh_Hmong
436  {"pahawh hmong", {0x16b00, 0x16bbf}},
437  // added manually! source https://en.wikipedia.org/wiki/Pollard_script
438  {"pollard", {0x16f00, 0x16f9f}},
439  // added manually! source https://en.wikipedia.org/wiki/Latin_script_in_Unicode
440  {"latin extended-g", {0x1df00, 0x1dfff}},
441  {"cjk unified ideographs extension b", {0x20000, 0x2a6df}},
442  {"cjk unified ideographs extension c", {0x2a700, 0x2b73f}},
443  // added manually! source https://en.wikipedia.org/wiki/Han_unification
444  {"cjk unified ideographs extension d", {0x2b740, 0x2b81f}},
445  // added manually! source https://en.wikipedia.org/wiki/Han_unification
446  {"cjk unified ideographs extension e", {0x2b820, 0x2ceaf}},
447  // added manually! source https://en.wikipedia.org/wiki/Han_unification
448  {"cjk unified ideographs extension f", {0x2ceb0, 0x2ebef}},
449  {"cjk compatibility ideographs supplement", {0x2f800, 0x2fa1f}},
450  // added manually! source https://en.wikipedia.org/wiki/Han_unification
451  {"cjk unified ideographs extension g", {0x30000, 0x3134f}},
452  {"tags", {0xe0000, 0xe007f}},
453  {"variation selectors supplement", {0xe0100, 0xe01ef}},
454  {"supplementary private use area-a", {0xf0000, 0xfffff}},
455  {"supplementary private use area-b", {0x100000, 0x10ffff}}};
456 
457  auto iter = unicode_groups.find(utils::to_lower(unicode_group));
458  if (iter == unicode_groups.end())
459  throw gui::exception("gui::localizer", "unknown Unicode group '" + unicode_group + "'");
460 
461  add_allowed_code_points(iter->second);
462 }
463 
464 void localizer::add_allowed_code_points_for_language(const std::string& language_code) {
465  // Lists from http://unicode.org/Public/cldr/39/cldr-common-39.0.zip
466  // Mapped manually to Unicode groups above with the help of
467  // https://unicode-org.github.io/cldr-staging/charts/37/supplemental/scripts_and_languages.html
468  static const std::vector<std::pair<std::vector<std::string>, std::vector<std::string>>>
469  scripts = {
470  {{"basic latin", "latin-1 supplement", "latin extended-a", "latin extended-b",
471  "latin extended-c", "latin extended-d", "latin extended-e", "latin extended-f",
472  "latin extended-g", "latin extended additional"},
473  {"aa", "abr", "ace", "ach", "ada", "af", "agq", "ain", "ak", "akz", "ale", "aln",
474  "amo", "an", "ang", "aoz", "arn", "aro", "arp", "arw", "asa", "ast", "atj", "avk",
475  "ay", "az", "bal", "ban", "bar", "bas", "bbc", "bbj", "bci", "bem", "bew", "bez",
476  "bfd", "bi", "bik", "bin", "bjn", "bkm", "bku", "bla", "bm", "bmq", "bqv", "br",
477  "brh", "bs", "bss", "bto", "buc", "bug", "bum", "bvb", "byv", "bze", "bzx", "ca",
478  "cad", "car", "cay", "cch", "ceb", "cgg", "ch", "chk", "chn", "cho", "chp", "chy",
479  "cic", "co", "cps", "cr", "crj", "crl", "crs", "cs", "csb", "ctd", "cy", "da",
480  "dak", "dav", "de", "del", "den", "dgr", "din", "dje", "dnj", "dsb", "dtm", "dtp",
481  "dua", "dum", "dyo", "dyu", "ebu", "ee", "efi", "egl", "eka", "en", "enm", "eo",
482  "es", "esu", "et", "ett", "eu", "ewo", "ext", "fan", "ff", "ffm", "fi", "fil",
483  "fit", "fj", "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr", "frs", "fud",
484  "fuq", "fur", "fuv", "fvr", "fy", "ga", "gaa", "gag", "gay", "gba", "gcr", "gd",
485  "gil", "gl", "gmh", "gn", "goh", "gor", "gos", "grb", "gsw", "gub", "guc", "gur",
486  "guz", "gv", "gwi", "ha", "hai", "haw", "hi", "hif", "hil", "hmn", "hnn", "ho",
487  "hop", "hr", "hsb", "ht", "hu", "hup", "hz", "ia", "iba", "ibb", "id", "ife",
488  "ig", "ii", "ik", "ikt", "ilo", "inh", "is", "it", "iu", "izh", "jam", "jgo",
489  "jmc", "jut", "jv", "kab", "kac", "kaj", "kam", "kao", "kcg", "kck", "kde", "kea",
490  "kfo", "kg", "kge", "kgp", "kha", "khq", "ki", "kiu", "kj", "kjg", "kkj", "kl",
491  "kln", "kmb", "kos", "kpe", "kr", "kri", "krj", "krl", "ksb", "ksf", "ksh", "ku",
492  "kut", "kvr", "kw", "ky", "la", "lag", "laj", "lam", "lb", "lbw", "lfn", "lg",
493  "li", "lij", "liv", "ljp", "lkt", "lmo", "ln", "lol", "loz", "lt", "ltg", "lu",
494  "lua", "lui", "lun", "luo", "lut", "luy", "lv", "lzz", "mad", "maf", "mak", "man",
495  "mas", "maz", "mdh", "mdr", "mdt", "men", "mer", "mfe", "mg", "mgh", "mgo", "mgy",
496  "mh", "mi", "mic", "min", "mls", "moe", "moh", "mos", "mro", "ms", "mt", "mua",
497  "mus", "mwk", "mwl", "mwv", "mxc", "myx", "na", "nap", "naq", "nb", "nch", "nd",
498  "ndc", "nds", "ng", "ngl", "nhe", "nhw", "nia", "nij", "niu", "njo", "nl", "nmg",
499  "nn", "nnh", "no", "nov", "nr", "nsk", "nso", "nus", "nv", "nxq", "ny", "nym",
500  "nyn", "nyo", "nzi", "oc", "oj", "om", "osa", "osc", "pag", "pam", "pap", "pau",
501  "pcd", "pcm", "pdc", "pdt", "pfl", "pko", "pl", "pms", "pnt", "pon", "prg", "pro",
502  "pt", "puu", "qu", "quc", "qug", "rap", "rar", "rcf", "rej", "rgn", "ria", "rif",
503  "rm", "rmf", "rmo", "rmu", "rn", "rng", "ro", "rob", "rof", "rom", "rtm", "rug",
504  "rup", "rw", "rwk", "sad", "saf", "saq", "sas", "sat", "sbp", "sc", "scn", "sco",
505  "scs", "sdc", "se", "see", "sef", "seh", "sei", "ses", "sg", "sga", "sgs", "shi",
506  "sid", "sk", "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", "sn", "snk",
507  "so", "sq", "sr", "srb", "srn", "srr", "ss", "ssy", "st", "stq", "su", "suk",
508  "sus", "sv", "sw", "swb", "swg", "sxn", "syi", "szl", "tbw", "tem", "teo", "ter",
509  "tet", "tg", "tiv", "tk", "tkl", "tkr", "tli", "tly", "tmh", "tn", "to", "tog",
510  "tpi", "tr", "tru", "trv", "ts", "tsg", "tsi", "ttj", "ttt", "tum", "tvl", "twq",
511  "ty", "tzm", "udm", "ug", "uli", "umb", "uz", "vai", "ve", "vec", "vep", "vi",
512  "vic", "vls", "vmf", "vmw", "vo", "vot", "vro", "vun", "wa", "wae", "war", "was",
513  "wbp", "wls", "wo", "xav", "xh", "xog", "xum", "yao", "yap", "yav", "ybb", "yo",
514  "yrl", "yua", "za", "zag", "zap", "zea", "zmi", "zu", "zun", "zza"}},
515  {{"cyrillic", "cyrillic supplement", "cyrillic extended-a", "cyrillic extended-b",
516  "cyrillic extended-c"},
517  {"ab", "abq", "ady", "aii", "alt", "av", "az", "ba", "be", "bg", "bs", "bua",
518  "ce", "chm", "cjs", "ckt", "crh", "cu", "cv", "dar", "dng", "evn", "gag", "gld",
519  "inh", "kaa", "kbd", "kca", "kjh", "kk", "koi", "kpy", "krc", "ku", "kum", "kv",
520  "ky", "lbe", "lez", "lfn", "mdf", "mk", "mn", "mns", "mrj", "myv", "nog", "os",
521  "pnt", "ro", "rom", "ru", "rue", "sah", "se", "sel", "sr", "tab", "tg", "tk",
522  "tkr", "tly", "tt", "ttt", "tyv", "ude", "udm", "ug", "uk", "uz", "xal", "yrk"}},
523  {{"devanagari", "devanagari extended", "vedic extensions"},
524  {"anp", "awa", "bap", "bfy", "bgc", "bhb", "bhi", "bho", "bjj", "bra", "brx",
525  "btv", "doi", "dty", "gbm", "gom", "gon", "gvr", "hi", "hif", "hne", "hoc",
526  "hoj", "jml", "kfr", "kfy", "khn", "kok", "kru", "ks", "lif", "mag", "mai",
527  "mgp", "mr", "mrd", "mtr", "mwr", "ne", "new", "noe", "pi", "raj", "rjs",
528  "sa", "sat", "sck", "sd", "srx", "swv", "taj", "tdg", "tdh", "thl", "thq",
529  "thr", "tkt", "unr", "unx", "wbr", "wtm", "xnr", "xsr"}},
530  {{"arabic", "arabic supplement", "arabic extended-a", "arabic presentation forms-a",
531  "arabic presentation forms-b", "arabic mathematical alphabetic symbols"},
532  {"aeb", "ar", "arq", "ars", "ary", "arz", "az", "bal", "bej", "bft", "bgn", "bqi",
533  "brh", "cja", "cjm", "ckb", "cop", "dcc", "doi", "dyo", "fa", "fia", "gbz", "gjk",
534  "gju", "glk", "ha", "haz", "hnd", "hno", "id", "inh", "khw", "kk", "ks", "ku",
535  "kvx", "kxp", "ky", "lah", "lki", "lrc", "luz", "mfa", "ms", "mvy", "mzn", "pa",
536  "prd", "ps", "rmt", "sd", "sdh", "shi", "skr", "so", "sus", "swb", "tg", "tk",
537  "tly", "tr", "trw", "ttt", "ug", "ur", "uz", "wni", "wo", "zdj"}},
538  {{"cjk radicals supplement", "cjk strokes", "cjk symbols and punctuation",
539  "cjk unified ideographs", "cjk unified ideographs extension a",
540  "cjk unified ideographs extension b", "cjk unified ideographs extension c",
541  "cjk unified ideographs extension d", "cjk unified ideographs extension e",
542  "cjk unified ideographs extension f", "cjk unified ideographs extension g",
543  "cjk compatibility", "cjk compatibility ideographs", "cjk compatibility forms",
544  "cjk compatibility ideographs supplement", "kangxi radicals"},
545  {"gan", "hak", "hsn", "lzh", "nan", "vi", "wuu", "yue", "za", "zh", "ko", "ja"}},
546  {{"greek and coptic", "greek extended", "coptic"},
547  {"bgx", "cop", "el", "grc", "pnt", "tsd"}},
548  {{"bengali"},
549  {"as", "bn", "bpy", "ccp", "grt", "kha", "lus", "mni", "rkt", "sat", "syl", "unr",
550  "unx"}},
551  {{"thai"}, {"kdt", "kxm", "lcp", "lwl", "pi", "sou", "th", "tts"}},
552  {{"ethiopic", "ethiopic supplement", "ethiopic extended"},
553  {"am", "byn", "gez", "om", "ti", "tig", "wal"}},
554  {{"hebrew"}, {"he", "jpr", "jrb", "lad", "sam", "yi"}},
555  {{"tibetan"}, {"bft", "bo", "dz", "taj", "tdg", "tsj"}},
556  {{"unified canadian aboriginal syllabics"}, {"bft", "bo", "dz", "taj", "tdg", "tsj"}},
557  {{"tifinagh"}, {"rif", "shi", "tzm", "zen", "zgh"}},
558  {{"telugu"}, {"gon", "lmn", "te", "wbq"}},
559  {{"syriac"}, {"aii", "ar", "syr", "tru"}},
560  {{"myanmar"}, {"kht", "mnw", "my", "shn"}},
561  {{"nko"}, {"bm", "man", "nqo"}},
562  {{"buginese"}, {"bug", "mak", "mdr"}},
563  {{"old italic"}, {"ett", "osc", "xum"}},
564  {{"lao"}, {"hnj", "kjg", "lo"}},
565  {{"georgian", "georgian supplement"}, {"ka", "lzz", "xmf"}},
566  {{"sinhala"}, {"pi", "sa", "si"}},
567  {{"tamil"}, {"bfq", "ta"}},
568  {{"katakana", "katakana phonetic extensions"}, {"ain", "ryu"}},
569  {{"cuneiform", "cuneiform numbers and punctuation"}, {"akk", "hit"}},
570  {{"cham"}, {"cja", "cjm"}},
571  {{"runic"}, {"de", "non"}},
572  {{"kayah"}, {"eky", "kyu"}},
573  {{"kannada"}, {"kn", "tcy"}},
574  {{"mongolian"}, {"mn", "mnc"}},
575  {{"phags-pa"}, {"mn", "zh"}},
576  {{"oriya"}, {"or", "sat"}},
577  {{"samaritan"}, {"sam", "smp"}},
578  {{"armenian"}, {"hy"}},
579  {{"javanese"}, {"jv"}},
580  {{"gujarati"}, {"gu"}},
581  {{"malayalam"}, {"ml"}},
582  {{"avestan"}, {"ae"}},
583  {{"aramaic"}, {"arc"}},
584  {{"balinese"}, {"ban"}},
585  {{"bamum"}, {"bax"}},
586  {{"batak"}, {"bbc"}},
587  {{"buhid"}, {"bku"}},
588  {{"tai viet"}, {"blt"}},
589  {{"chakma"}, {"ccp"}},
590  {{"cherokee"}, {"chr"}},
591  {{"takri"}, {"doi"}},
592  {{"thaana"}, {"dv"}},
593  {{"egyptian hieroglyphs"}, {"egy"}},
594  {{"adlam"}, {"ff"}},
595  {{"tagalog"}, {"fil"}},
596  {{"gothic"}, {"got"}},
597  {{"cypriot"}, {"grc"}},
598  {{"linear b syllabary", "linear b ideograms"}, {"grc"}},
599  {{"mahajani"}, {"hi"}},
600  {{"pollard"}, {"hmd"}},
601  {{"pahawh hmong"}, {"hmn"}},
602  {{"hanunoo"}, {"hnn"}},
603  {{"varang kshiti"}, {"hoc"}},
604  {{"yi syllables", "yi radicals"}, {"ii"}},
605  {{"hiragana", "katakana"}, {"ja"}},
606  {{"new tai lue"}, {"khb"}},
607  {{"khmer", "khmer symbols"}, {"km"}},
608  {{"hangul jamo", "hangul compatibility jamo", "hangul jamo extended-a",
609  "hangul jamo extended-b", "hangul syllables"},
610  {"ko"}},
611  {{"permic"}, {"kv"}},
612  {{"linear a"}, {"lab"}},
613  {{"lepcha"}, {"lep"}},
614  {{"caucasian albanian"}, {"lez"}},
615  {{"limbu"}, {"lif"}},
616  {{"lisu"}, {"lis"}},
617  {{"tirhuta"}, {"mai"}},
618  {{"mende"}, {"men"}},
619  {{"meetei mayek"}, {"mni"}},
620  {{"modi"}, {"mr"}},
621  {{"mro"}, {"mro"}},
622  {{"mandaic"}, {"myz"}},
623  {{"tai tham"}, {"nod"}},
624  {{"osage"}, {"osa"}},
625  {{"old turkic"}, {"otk"}},
626  {{"gurmukhi"}, {"pa"}},
627  {{"inscriptional pahlavi"}, {"pal"}},
628  {{"old persian"}, {"peo"}},
629  {{"phoenician"}, {"phn"}},
630  {{"rejang"}, {"rej"}},
631  {{"grantha", "sharada", "siddham"}, {"sa"}},
632  {{"ol chiki"}, {"sat"}},
633  {{"saurashtra"}, {"saz"}},
634  {{"khojki", "khudawadi"}, {"sd"}},
635  {{"ogham"}, {"sga"}},
636  {{"osmanya"}, {"so"}},
637  {{"elbasan"}, {"sq"}},
638  {{"sora sompeng"}, {"srb"}},
639  {{"sundanese"}, {"su"}},
640  {{"syloti nagri"}, {"syl"}},
641  {{"tagbanwa"}, {"tbw"}},
642  {{"tai le"}, {"tdd"}},
643  {{"ugaritic"}, {"uga"}},
644  {{"vai"}, {"vai"}},
645  {{"carian"}, {"xcr"}},
646  {{"lycian"}, {"xlc"}},
647  {{"lydian"}, {"xld"}},
648  {{"manichaean"}, {"xmn"}},
649  {{"meroitic"}, {"xmr"}},
650  {{"old north arabian"}, {"xna"}},
651  {{"inscriptional parthian"}, {"xpr"}},
652  {{"old south arabian"}, {"xsa"}},
653  {{"bopomofo", "bopomofo extended"}, {"zh"}}};
654 
655  // Add basic latin (= ASCII) for all languages (required to display URLs for example).
656  add_allowed_code_points_for_group("basic latin");
657  // Add "geometric shapes" to allow rendering the "missing character" glyph
658  add_allowed_code_points_for_group("geometric shapes");
659 
660  for (const auto& script : scripts) {
661  if (std::find(script.second.begin(), script.second.end(), language_code) ==
662  script.second.end())
663  continue;
664 
665  for (const auto& code_range : script.first)
667  }
668 }
669 
672 
673  if (languages_.empty()) {
674  // If no language specified, fall back to basic latin (=ASCII)
675  add_allowed_code_points_for_group("basic latin");
676  // Add "geometric shapes" to allow rendering the "missing character" glyph
677  add_allowed_code_points_for_group("geometric shapes");
678  return;
679  }
680 
681  // Add language-specific groups
682  for (const auto& language : languages_) {
683  // Extract the language code from the language string (first set of lower case letters)
684  auto pos =
685  std::find_if(language.begin(), language.end(), [](char c) { return std::isupper(c); });
686 
687  add_allowed_code_points_for_language(std::string(language.begin(), pos));
688  }
689 }
690 
691 const std::vector<code_point_range>& localizer::get_allowed_code_points() const {
692  return code_points_;
693 }
694 
695 void localizer::set_fallback_code_point(char32_t code_point) {
696  default_code_point_ = code_point;
697 }
698 
700  return default_code_point_;
701 }
702 
703 void localizer::load_translations(const std::string& folder_path) {
704  // First, look for an exact match
705  for (const std::string& language : languages_) {
706  std::string language_file = folder_path + "/" + language + ".lua";
707  if (utils::file_exists(language_file)) {
708  load_translation_file(language_file);
709  return;
710  }
711  }
712 
713  // If no exact match found, look for an approximate match (ignore region)
714  const auto files = utils::get_file_list(folder_path, false, "lua");
715  for (const std::string& language : languages_) {
716  auto iter = std::find_if(files.begin(), files.end(), [&](const std::string& file) {
717  return file.size() == 8u && file.substr(0, 2) == language.substr(0, 2);
718  });
719 
720  if (iter == files.end())
721  continue;
722 
723  std::string language_file = folder_path + "/" + *iter;
724  load_translation_file(language_file);
725  return;
726  }
727 
728  // If no match found, fall back to US english
729  std::string language_file = folder_path + "/enUS.lua";
730  if (utils::file_exists(language_file)) {
731  load_translation_file(language_file);
732  return;
733  }
734 }
735 
736 void localizer::load_translation_file(const std::string& file_name) try {
737  auto result = lua_.do_file(file_name);
738  if (!result.valid()) {
739  gui::out << gui::error << "gui::locale: " << result.get<sol::error>().what() << std::endl;
740  return;
741  }
742 
743  sol::table table = lua_["localize"];
744  if (table == sol::lua_nil) {
745  gui::out << gui::warning << "gui::locale: no 'localize' table in " << file_name
746  << std::endl;
747  return;
748  }
749 
750  table.for_each([&](const sol::object& key, const sol::object& value) {
751  if (!key.is<std::string>())
752  return;
753  std::string ks = key.as<std::string>();
754 
755  if (value.is<std::string>())
756  map_.insert(std::make_pair(std::hash<std::string>{}(ks), value.as<std::string>()));
757  else if (value.is<sol::protected_function>())
758  map_.insert(
759  std::make_pair(std::hash<std::string>{}(ks), value.as<sol::protected_function>()));
760  });
761 
762  // Keep a copy so variables/functions remain alive
763  lua_["localize_" + std::to_string(std::hash<std::string>{}(file_name))] = table;
764 } catch (const sol::error& err) {
765  gui::out << gui::error << "gui::locale: " << err.what() << std::endl;
766  return;
767 }
768 
770  map_.clear();
771 }
772 
773 bool localizer::is_key_valid_(std::string_view key) const {
774  return !key.empty() && key.front() == '{' && key.back() == '}';
775 }
776 
777 localizer::map_type::const_iterator localizer::find_key_(std::string_view key) const {
778  auto substring = key.substr(1, key.size() - 2);
779  return map_.find(std::hash<std::string_view>{}(substring));
780 }
781 
782 std::string localizer::format_string(std::string_view message, sol::variadic_args args) const {
783  fmt::dynamic_format_arg_store<fmt::format_context> store;
784  for (auto&& arg : args) {
785  lxgui::utils::variant variant;
786  if (!arg.is<sol::lua_nil_t>())
787  variant = arg;
788 
789  std::visit(
790  [&](auto& value) {
791  using inner_type = std::decay_t<decltype(value)>;
792  if constexpr (std::is_same_v<inner_type, lxgui::utils::empty>)
793  store.push_back(static_cast<const char*>(""));
794  else
795  store.push_back(value);
796  },
797  variant);
798  }
799 
800  return fmt::vformat(locale_, message, store);
801 }
802 
803 std::string localizer::localize(std::string_view key, sol::variadic_args args) const {
804  if (!is_key_valid_(key))
805  return std::string{key};
806 
807  auto iter = find_key_(key);
808  if (iter == map_.end())
809  return std::string{key};
810 
811  return std::visit(
812  [&](const auto& item) {
813  using inner_type = std::decay_t<decltype(item)>;
814  if constexpr (std::is_same_v<inner_type, std::string>) {
815  return format_string(item, args);
816  } else {
817  auto result = item(args);
818  if (!result.valid()) {
820  << "gui::locale: " << result.template get<sol::error>().what()
821  << std::endl;
822  return std::string{key};
823  }
824 
825  if (result.begin() != result.end()) {
826  auto&& first = *result.begin();
827  if (first.template is<std::string>())
828  return first.template as<std::string>();
829  }
830 
831  return std::string{key};
832  }
833  },
834  iter->second);
835 }
836 
837 } // namespace lxgui::gui
Exception to be thrown by GUI code.
void add_allowed_code_points_for_group(const std::string &unicode_group)
Adds a new range to the set of allowed code points from a Unicode group.
void clear_translations()
Removes all previously loaded translations.
void clear_allowed_code_points()
Removes all allowed code points.
const std::vector< std::string > & get_preferred_languages() const
Returns the list of code names of the preferred languages (used to translate messages and.
void auto_detect_preferred_languages()
Attempts to automatically detect the current language (used to translate messages and.
void load_translations(const std::string &folder_path)
Loads new translations from a folder, selecting the language automatically.
void add_allowed_code_points(const code_point_range &range)
Adds a new range to the set of allowed code points.
char32_t get_fallback_code_point() const
Returns the default character to display if a character is missing from a font.
void set_preferred_languages(const std::vector< std::string > &languages)
Changes the current language (used to translate messages and strings).
const std::vector< code_point_range > & get_allowed_code_points() const
Returns the list of allowed code points (Unicode characters), for text rendering.
void add_allowed_code_points_for_language(const std::string &language_code)
Adds a new range to the set of allowed code points for a given language.
std::string format_string(std::string_view message, sol::variadic_args args) const
Translates a string with a certain number of arguments from Lua (zero or many).
localizer()
Default constructor.
void set_locale(const std::locale &locale)
Changes the current locale (used to format numbers).
void auto_detect_allowed_code_points()
Attempts to automatically detect the set of allowed code points based on preferred.
void set_fallback_code_point(char32_t code_point)
Sets the default character to display if a character is missing from a font.
const std::locale & get_locale() const
Returns the current locale (used to format numbers).
std::string localize(std::string_view key, sol::variadic_args args) const
Translates a string with a certain number of arguments from Lua (zero or many).
void load_translation_file(const std::string &file_name)
Loads new translations from a file.
const char * what() const noexcept override
Override std::exception::what()
std::ostream out
const std::string warning
Definition: gui_out.cpp:6
const std::string error
Definition: gui_out.cpp:7
std::variant< empty, bool, std::int64_t, std::int32_t, std::int16_t, std::int8_t, std::uint64_t, std::uint32_t, std::uint16_t, std::uint8_t, double, float, std::string > variant
Type-erased value for passing arguments to events.
string_vector get_file_list(const std::string &rel_path, bool with_path)
bool file_exists(const std::string &file)
Represents a contiguous range of unicode code points.