lxgui
Loading...
Searching...
No Matches
gui_localizer.cpp
1#include "lxgui/gui_localizer.hpp"
2
3#include "lxgui/gui_exception.hpp"
4#include "lxgui/gui_out.hpp"
5#include "lxgui/utils_file_system.hpp"
6#include "lxgui/utils_range.hpp"
7#include "lxgui/utils_string.hpp"
8#include "lxgui/utils_variant.hpp"
9
10#include <cstdlib>
11#include <cstring>
12#include <fmt/args.h>
13#include <functional>
14#include <lxgui/extern_sol2_state.hpp>
15
16namespace lxgui::gui {
17
18namespace {
19
20std::string get_environment_variable(const std::string& name) {
21#if defined(LXGUI_PLATFORM_WINDOWS)
22 // Windows has std::getenv, but MSVC offers a safer alternative that it insists on using
23 char* buffer = nullptr;
24 std::size_t size = 0;
25 if (_dupenv_s(&buffer, &size, name.c_str()) != 0 || buffer == nullptr)
26 return "";
27
28 std::string result = buffer;
29 free(buffer);
30 return result;
31#else
32 const char* result = std::getenv(name.c_str());
33 return result != nullptr ? result : "";
34#endif
35}
36
37std::vector<std::string> get_default_languages() {
38 // First try parsing the LANGUAGE environment variable.
39 // This is the best, because it lets the user specify a list of languages
40 // in descending priority, so if a translation is unavailable in their
41 // primary language, they may still get another match which would be
42 // better for them than the default enUS (e.g., a French person could
43 // prefer to fall back on a Spanish translation rather than English).
44 const std::string language_var = get_environment_variable("LANGUAGE");
45 if (!language_var.empty()) {
46 std::vector<std::string> output;
47 for (auto language : utils::cut(language_var, ":")) {
48 std::string language_normalized{language};
49 utils::replace(language_normalized, "_", "");
50 if (language_normalized.size() == 4)
51 output.push_back(language_normalized);
52 }
53
54 if (!output.empty())
55 return output;
56 }
57
58#if defined(LXGUI_PLATFORM_WINDOWS)
59 // If LANGUAGE is not specified, on Windows, try OS-specific function.
60 // TODO: https://github.com/cschreib/lxgui/issues/95
61#endif
62
63 // If LANGUAGE is not specified or empty, try LANG.
64 std::string lang = get_environment_variable("LANG");
65 if (!lang.empty()) {
66 auto pos1 = lang.find_first_of(".@");
67 if (pos1 != std::string::npos)
68 lang = lang.substr(0, pos1);
69
70 utils::replace(lang, "_", "");
71 if (lang.size() == 4)
72 return {lang};
73 }
74
75 return {"enUS"};
76}
77} // namespace
78
80 try {
81 // Try to set locale to system default.
82 locale_ = std::locale("");
83 } catch (const std::exception& exception) {
84 // Revert to C locale.
85 locale_ = std::locale::classic();
86 gui::out << gui::error << "gui::locale: " << exception.what() << std::endl;
87 gui::out << gui::error << "gui::locale: reverting to default classic locale" << std::endl;
88 }
89
90 // Find default languages.
93
94 // Set up Lua sandbox.
95 lua_.open_libraries(
96 sol::lib::base, sol::lib::math, sol::lib::table, sol::lib::io, sol::lib::os,
97 sol::lib::string, sol::lib::debug);
98
99 // Give the translation Lua state the localize_string and format_string function, so
100 // it can call it recursively as needed, but not the other functions
101 // which could load more translation strings.
102 lua_.set_function("localize_string", [&](const std::string& key, sol::variadic_args args) {
103 return localize(key, args);
104 });
105 lua_.set_function("format_string", [&](const std::string& key, sol::variadic_args args) {
106 return format_string(key, args);
107 });
108}
109
110void localizer::set_locale(const std::locale& locale) {
111 if (locale_ == locale)
112 return;
113
114 locale_ = locale;
116}
117
118void localizer::set_preferred_languages(const std::vector<std::string>& languages) {
119#if 0
120 // TODO implement more generic input checks
121 // https://github.com/cschreib/lxgui/issues/98
122 for (const auto& language: languages) {
123 if (language.size() != 4) {
124 throw gui::exception("gui::localizer", "language code must have exactly 4 characters");
125 }
126 }
127#endif
128
129 languages_ = languages;
131}
132
134 set_preferred_languages(get_default_languages());
135}
136
137const std::locale& localizer::get_locale() const {
138 return locale_;
139}
140
141const std::vector<std::string>& localizer::get_preferred_languages() const {
142 return languages_;
143}
144
146 code_points_.clear();
147}
148
150 if (range.last < range.first)
151 throw gui::exception("gui::localizer", "code point range must have last >= first");
152
153 code_point_range test_range = range;
154 auto iter = code_points_.begin();
155
156 do {
157 // Find next overlapping range
158 iter = std::find_if(code_points_.begin(), code_points_.end(), [&](const auto& other) {
159 return (test_range.first >= other.first && test_range.first <= other.last) ||
160 (test_range.last >= other.first && test_range.last <= other.last) ||
161 (other.first >= test_range.first && other.first <= test_range.last) ||
162 (other.last >= test_range.first && other.last <= test_range.last);
163 });
164
165 if (iter != code_points_.end()) {
166 // Combine the ranges
167 test_range.first = std::min(test_range.first, iter->first);
168 test_range.last = std::max(test_range.last, iter->last);
169
170 // Erase the overlap
171 code_points_.erase(iter);
172 }
173 } while (iter != code_points_.end());
174
175 // Add the new range
176 code_points_.push_back(test_range);
177
178 // Sort by ascending code point
179 std::sort(code_points_.begin(), code_points_.end(), [](const auto& left, const auto& right) {
180 return left.first < right.first;
181 });
182}
183
184void localizer::add_allowed_code_points_for_group(const std::string& unicode_group) {
185 // List from http://www.unicode.org/Public/5.2.0/ucdxml/ucd.all.flat.zip
186 // Adjusted "basic latin" and "latin-1 supplement" to remove non-printable chars.
187 static std::unordered_map<std::string, code_point_range> unicode_groups = {
188 {"basic latin", {0x0020, 0x007e}},
189 {"latin-1 supplement", {0x00a0, 0x00ff}},
190 {"latin extended-a", {0x0100, 0x017f}},
191 {"latin extended-b", {0x0180, 0x024f}},
192 {"ipa extensions", {0x0250, 0x02af}},
193 {"spacing modifier letters", {0x02b0, 0x02ff}},
194 {"combining diacritical marks", {0x0300, 0x036f}},
195 {"greek and coptic", {0x0370, 0x03ff}},
196 {"cyrillic", {0x0400, 0x04ff}},
197 {"cyrillic supplement", {0x0500, 0x052f}},
198 {"armenian", {0x0530, 0x058f}},
199 {"hebrew", {0x0590, 0x05ff}},
200 {"arabic", {0x0600, 0x06ff}},
201 {"syriac", {0x0700, 0x074f}},
202 {"arabic supplement", {0x0750, 0x077f}},
203 {"thaana", {0x0780, 0x07bf}},
204 {"nko", {0x07c0, 0x07ff}},
205 {"samaritan", {0x0800, 0x083f}},
206 // added manually! source https://en.wikipedia.org/wiki/Mandaic_script
207 {"mandaic", {0x0840, 0x085f}},
208 // added manually! source https://en.wikipedia.org/wiki/Arabic_alphabet
209 {"arabic extended-a", {0x08a0, 0x08ff}},
210 {"devanagari", {0x0900, 0x097f}},
211 {"bengali", {0x0980, 0x09ff}},
212 {"gurmukhi", {0x0a00, 0x0a7f}},
213 {"gujarati", {0x0a80, 0x0aff}},
214 {"oriya", {0x0b00, 0x0b7f}},
215 {"tamil", {0x0b80, 0x0bff}},
216 {"telugu", {0x0c00, 0x0c7f}},
217 {"kannada", {0x0c80, 0x0cff}},
218 {"malayalam", {0x0d00, 0x0d7f}},
219 {"sinhala", {0x0d80, 0x0dff}},
220 {"thai", {0x0e00, 0x0e7f}},
221 {"lao", {0x0e80, 0x0eff}},
222 {"tibetan", {0x0f00, 0x0fff}},
223 {"myanmar", {0x1000, 0x109f}},
224 {"georgian", {0x10a0, 0x10ff}},
225 {"hangul jamo", {0x1100, 0x11ff}},
226 {"ethiopic", {0x1200, 0x137f}},
227 {"ethiopic supplement", {0x1380, 0x139f}},
228 {"cherokee", {0x13a0, 0x13ff}},
229 {"unified canadian aboriginal syllabics", {0x1400, 0x167f}},
230 {"ogham", {0x1680, 0x169f}},
231 {"runic", {0x16a0, 0x16ff}},
232 {"tagalog", {0x1700, 0x171f}},
233 {"hanunoo", {0x1720, 0x173f}},
234 {"buhid", {0x1740, 0x175f}},
235 {"tagbanwa", {0x1760, 0x177f}},
236 {"khmer", {0x1780, 0x17ff}},
237 {"mongolian", {0x1800, 0x18af}},
238 {"unified canadian aboriginal syllabics extended", {0x18b0, 0x18ff}},
239 {"limbu", {0x1900, 0x194f}},
240 {"tai le", {0x1950, 0x197f}},
241 {"new tai lue", {0x1980, 0x19df}},
242 {"khmer symbols", {0x19e0, 0x19ff}},
243 {"buginese", {0x1a00, 0x1a1f}},
244 {"tai tham", {0x1a20, 0x1aaf}},
245 {"balinese", {0x1b00, 0x1b7f}},
246 {"sundanese", {0x1b80, 0x1bbf}},
247 // added manually! source https://en.wikipedia.org/wiki/Batak_script
248 {"batak", {0x1bc0, 0x1bff}},
249 {"lepcha", {0x1c00, 0x1c4f}},
250 {"ol chiki", {0x1c50, 0x1c7f}},
251 // added manually! source https://en.wikipedia.org/wiki/Cyrillic_script
252 {"cyrillic extended-c", {0x1c80, 0x1c8f}},
253 {"vedic extensions", {0x1cd0, 0x1cff}},
254 {"phonetic extensions", {0x1d00, 0x1d7f}},
255 {"phonetic extensions supplement", {0x1d80, 0x1dbf}},
256 {"combining diacritical marks supplement", {0x1dc0, 0x1dff}},
257 {"latin extended additional", {0x1e00, 0x1eff}},
258 {"greek extended", {0x1f00, 0x1fff}},
259 {"general punctuation", {0x2000, 0x206f}},
260 {"superscripts and subscripts", {0x2070, 0x209f}},
261 {"currency symbols", {0x20a0, 0x20cf}},
262 {"combining diacritical marks for symbols", {0x20d0, 0x20ff}},
263 {"letterlike symbols", {0x2100, 0x214f}},
264 {"number forms", {0x2150, 0x218f}},
265 {"arrows", {0x2190, 0x21ff}},
266 {"mathematical operators", {0x2200, 0x22ff}},
267 {"miscellaneous technical", {0x2300, 0x23ff}},
268 {"control pictures", {0x2400, 0x243f}},
269 {"optical character recognition", {0x2440, 0x245f}},
270 {"enclosed alphanumerics", {0x2460, 0x24ff}},
271 {"box drawing", {0x2500, 0x257f}},
272 {"block elements", {0x2580, 0x259f}},
273 {"geometric shapes", {0x25a0, 0x25ff}},
274 {"miscellaneous symbols", {0x2600, 0x26ff}},
275 {"dingbats", {0x2700, 0x27bf}},
276 {"miscellaneous mathematical symbols-a", {0x27c0, 0x27ef}},
277 {"supplemental arrows-a", {0x27f0, 0x27ff}},
278 {"braille patterns", {0x2800, 0x28ff}},
279 {"supplemental arrows-b", {0x2900, 0x297f}},
280 {"miscellaneous mathematical symbols-b", {0x2980, 0x29ff}},
281 {"supplemental mathematical operators", {0x2a00, 0x2aff}},
282 {"miscellaneous symbols and arrows", {0x2b00, 0x2bff}},
283 {"glagolitic", {0x2c00, 0x2c5f}},
284 {"latin extended-c", {0x2c60, 0x2c7f}},
285 {"coptic", {0x2c80, 0x2cff}},
286 {"georgian supplement", {0x2d00, 0x2d2f}},
287 {"tifinagh", {0x2d30, 0x2d7f}},
288 {"ethiopic extended", {0x2d80, 0x2ddf}},
289 {"cyrillic extended-a", {0x2de0, 0x2dff}},
290 {"supplemental punctuation", {0x2e00, 0x2e7f}},
291 {"cjk radicals supplement", {0x2e80, 0x2eff}},
292 {"kangxi radicals", {0x2f00, 0x2fdf}},
293 {"ideographic description characters", {0x2ff0, 0x2fff}},
294 {"cjk symbols and punctuation", {0x3000, 0x303f}},
295 {"hiragana", {0x3040, 0x309f}},
296 {"katakana", {0x30a0, 0x30ff}},
297 {"bopomofo", {0x3100, 0x312f}},
298 {"hangul compatibility jamo", {0x3130, 0x318f}},
299 {"kanbun", {0x3190, 0x319f}},
300 {"bopomofo extended", {0x31a0, 0x31bf}},
301 {"cjk strokes", {0x31c0, 0x31ef}},
302 {"katakana phonetic extensions", {0x31f0, 0x31ff}},
303 {"enclosed cjk letters and months", {0x3200, 0x32ff}},
304 {"cjk compatibility", {0x3300, 0x33ff}},
305 {"cjk unified ideographs extension a", {0x3400, 0x4dbf}},
306 {"yijing hexagram symbols", {0x4dc0, 0x4dff}},
307 {"cjk unified ideographs", {0x4e00, 0x9fff}},
308 {"yi syllables", {0xa000, 0xa48f}},
309 {"yi radicals", {0xa490, 0xa4cf}},
310 {"lisu", {0xa4d0, 0xa4ff}},
311 {"vai", {0xa500, 0xa63f}},
312 {"cyrillic extended-b", {0xa640, 0xa69f}},
313 {"bamum", {0xa6a0, 0xa6ff}},
314 {"modifier tone letters", {0xa700, 0xa71f}},
315 {"latin extended-d", {0xa720, 0xa7ff}},
316 {"syloti nagri", {0xa800, 0xa82f}},
317 {"common indic number forms", {0xa830, 0xa83f}},
318 {"phags-pa", {0xa840, 0xa87f}},
319 {"saurashtra", {0xa880, 0xa8df}},
320 {"devanagari extended", {0xa8e0, 0xa8ff}},
321 {"kayah li", {0xa900, 0xa92f}},
322 {"rejang", {0xa930, 0xa95f}},
323 {"hangul jamo extended-a", {0xa960, 0xa97f}},
324 {"javanese", {0xa980, 0xa9df}},
325 {"cham", {0xaa00, 0xaa5f}},
326 {"myanmar extended-a", {0xaa60, 0xaa7f}},
327 {"tai viet", {0xaa80, 0xaadf}},
328 // added manually! source https://en.wikipedia.org/wiki/Latin_script_in_Unicode
329 {"latin extended-e", {0xab30, 0xab6f}},
330 {"meetei mayek", {0xabc0, 0xabff}},
331 {"hangul syllables", {0xac00, 0xd7af}},
332 {"hangul jamo extended-b", {0xd7b0, 0xd7ff}},
333 {"high surrogates", {0xd800, 0xdb7f}},
334 {"high private use surrogates", {0xdb80, 0xdbff}},
335 {"low surrogates", {0xdc00, 0xdfff}},
336 {"private use area", {0xe000, 0xf8ff}},
337 {"cjk compatibility ideographs", {0xf900, 0xfaff}},
338 {"alphabetic presentation forms", {0xfb00, 0xfb4f}},
339 {"arabic presentation forms-a", {0xfb50, 0xfdff}},
340 {"variation selectors", {0xfe00, 0xfe0f}},
341 {"vertical forms", {0xfe10, 0xfe1f}},
342 {"combining half marks", {0xfe20, 0xfe2f}},
343 {"cjk compatibility forms", {0xfe30, 0xfe4f}},
344 {"small form variants", {0xfe50, 0xfe6f}},
345 {"arabic presentation forms-b", {0xfe70, 0xfeff}},
346 {"halfwidth and fullwidth forms", {0xff00, 0xffef}},
347 {"specials", {0xfff0, 0xffff}},
348 {"linear b syllabary", {0x10000, 0x1007f}},
349 {"linear b ideograms", {0x10080, 0x100ff}},
350 // added manually! source https://en.wikipedia.org/wiki/Caucasian_Albanian_script
351 {"caucasian albanian", {0x10530, 0x1056f}},
352 // added manually! source https://en.wikipedia.org/wiki/Linear_A
353 {"linear a", {0x10600, 0x1077f}},
354 {"aegean numbers", {0x10100, 0x1013f}},
355 {"ancient greek numbers", {0x10140, 0x1018f}},
356 {"ancient symbols", {0x10190, 0x101cf}},
357 {"phaistos disc", {0x101d0, 0x101ff}},
358 {"lycian", {0x10280, 0x1029f}},
359 {"carian", {0x102a0, 0x102df}},
360 {"old italic", {0x10300, 0x1032f}},
361 {"gothic", {0x10330, 0x1034f}},
362 {"permic", {0x10350, 0x1037f}},
363 {"ugaritic", {0x10380, 0x1039f}},
364 {"old persian", {0x103a0, 0x103df}},
365 {"deseret", {0x10400, 0x1044f}},
366 {"shavian", {0x10450, 0x1047f}},
367 {"osmanya", {0x10480, 0x104af}},
368 // added manually! source https://en.wikipedia.org/wiki/Osage_script
369 {"osage", {0x104b0, 0x104ff}},
370 // added manually! source https://en.wikipedia.org/wiki/Elbasan_script
371 {"elbasan", {0x10500, 0x1052f}},
372 // added manually! source https://en.wikipedia.org/wiki/Latin_script_in_Unicode
373 {"latin extended-f", {0x10780, 0x107bf}},
374 {"cypriot syllabary", {0x10800, 0x1083f}},
375 {"imperial aramaic", {0x10840, 0x1085f}},
376 {"phoenician", {0x10900, 0x1091f}},
377 {"lydian", {0x10920, 0x1093f}},
378 {"meroitic", {0x10980, 0x109ff}},
379 {"kharoshthi", {0x10a00, 0x10a5f}},
380 {"old south arabian", {0x10a60, 0x10a7f}},
381 // added manually! source https://en.wikipedia.org/wiki/Old_North_Arabian_(Unicode_block)
382 {"old north arabian", {0x10a80, 0x10a9f}},
383 // added manually! source https://en.wikipedia.org/wiki/Manichaean_script
384 {"manichaean", {0x10ac0, 0x10aff}},
385 {"avestan", {0x10b00, 0x10b3f}},
386 {"inscriptional parthian", {0x10b40, 0x10b5f}},
387 {"inscriptional pahlavi", {0x10b60, 0x10b7f}},
388 {"old turkic", {0x10c00, 0x10c4f}},
389 {"rumi numeral symbols", {0x10e60, 0x10e7f}},
390 {"kaithi", {0x11080, 0x110cf}},
391 // added manually! source https://en.wikipedia.org/wiki/Sorang_Sompeng_script
392 {"sora sompeng", {0x110d0, 0x110ff}},
393 // added manually! source https://en.wikipedia.org/wiki/Chakma_script
394 {"chakma", {0x11100, 0x1114f}},
395 // added manually! source https://en.wikipedia.org/wiki/Mahajani
396 {"mahajani", {0x11150, 0x1117f}},
397 // added manually! source https://en.wikipedia.org/wiki/Sharada_(Unicode_block)
398 {"sharada", {0x11180, 0x111df}},
399 // added manually! source https://en.wikipedia.org/wiki/Khojki_script
400 {"khojki", {0x11200, 0x1124f}},
401 // added manually! source https://en.wikipedia.org/wiki/Khudabadi_script
402 {"khudawadi", {0x112b0, 0x112ff}},
403 // added manually! source https://en.wikipedia.org/wiki/Grantha_script
404 {"grantha", {0x11300, 0x1137f}},
405 // added manually! source https://en.wikipedia.org/wiki/Tirhuta_script
406 {"tirhuta", {0x11480, 0x114df}},
407 // added manually! source https://en.wikipedia.org/wiki/Siddha%E1%B9%83_script
408 {"siddham", {0x11580, 0x115ff}},
409 // added manually! source https://en.wikipedia.org/wiki/Modi_script
410 {"modi", {0x11600, 0x1165f}},
411 // added manually! source https://en.wikipedia.org/wiki/Takri_script
412 {"takri", {0x11680, 0x116cf}},
413 // added manually! source https://en.wikipedia.org/wiki/Warang_Citi
414 {"varang kshiti", {0x118a0, 0x118ff}},
415 {"cuneiform", {0x12000, 0x123ff}},
416 {"cuneiform numbers and punctuation", {0x12400, 0x1247f}},
417 {"egyptian hieroglyphs", {0x13000, 0x1342f}},
418 {"byzantine musical symbols", {0x1d000, 0x1d0ff}},
419 {"musical symbols", {0x1d100, 0x1d1ff}},
420 {"ancient greek musical notation", {0x1d200, 0x1d24f}},
421 {"tai xuan jing symbols", {0x1d300, 0x1d35f}},
422 {"counting rod numerals", {0x1d360, 0x1d37f}},
423 {"mathematical alphanumeric symbols", {0x1d400, 0x1d7ff}},
424 {"adlam", {0x1e800, 0x1e8df}},
425 // added manually! source https://en.wikipedia.org/wiki/Mende_Kikakui_script
426 {"mende", {0x1e900, 0x1e95f}},
427 // added manually! source https://en.wikipedia.org/wiki/Arabic_alphabet
428 {"arabic mathematical alphabetic symbols", {0x1ee00, 0x1eeff}},
429 {"mahjong tiles", {0x1f000, 0x1f02f}},
430 {"domino tiles", {0x1f030, 0x1f09f}},
431 {"enclosed alphanumeric supplement", {0x1f100, 0x1f1ff}},
432 {"enclosed ideographic supplement", {0x1f200, 0x1f2ff}},
433 // added manually! source https://en.wikipedia.org/wiki/Mro_(Unicode_block)
434 {"mro", {0x16a40, 0x16a6f}},
435 // added manually! source https://en.wikipedia.org/wiki/Pahawh_Hmong
436 {"pahawh hmong", {0x16b00, 0x16bbf}},
437 // added manually! source https://en.wikipedia.org/wiki/Pollard_script
438 {"pollard", {0x16f00, 0x16f9f}},
439 // added manually! source https://en.wikipedia.org/wiki/Latin_script_in_Unicode
440 {"latin extended-g", {0x1df00, 0x1dfff}},
441 {"cjk unified ideographs extension b", {0x20000, 0x2a6df}},
442 {"cjk unified ideographs extension c", {0x2a700, 0x2b73f}},
443 // added manually! source https://en.wikipedia.org/wiki/Han_unification
444 {"cjk unified ideographs extension d", {0x2b740, 0x2b81f}},
445 // added manually! source https://en.wikipedia.org/wiki/Han_unification
446 {"cjk unified ideographs extension e", {0x2b820, 0x2ceaf}},
447 // added manually! source https://en.wikipedia.org/wiki/Han_unification
448 {"cjk unified ideographs extension f", {0x2ceb0, 0x2ebef}},
449 {"cjk compatibility ideographs supplement", {0x2f800, 0x2fa1f}},
450 // added manually! source https://en.wikipedia.org/wiki/Han_unification
451 {"cjk unified ideographs extension g", {0x30000, 0x3134f}},
452 {"tags", {0xe0000, 0xe007f}},
453 {"variation selectors supplement", {0xe0100, 0xe01ef}},
454 {"supplementary private use area-a", {0xf0000, 0xfffff}},
455 {"supplementary private use area-b", {0x100000, 0x10ffff}}};
456
457 auto iter = unicode_groups.find(utils::to_lower(unicode_group));
458 if (iter == unicode_groups.end())
459 throw gui::exception("gui::localizer", "unknown Unicode group '" + unicode_group + "'");
460
461 add_allowed_code_points(iter->second);
462}
463
464void localizer::add_allowed_code_points_for_language(const std::string& language_code) {
465 // Lists from http://unicode.org/Public/cldr/39/cldr-common-39.0.zip
466 // Mapped manually to Unicode groups above with the help of
467 // https://unicode-org.github.io/cldr-staging/charts/37/supplemental/scripts_and_languages.html
468 static const std::vector<std::pair<std::vector<std::string>, std::vector<std::string>>>
469 scripts = {
470 {{"basic latin", "latin-1 supplement", "latin extended-a", "latin extended-b",
471 "latin extended-c", "latin extended-d", "latin extended-e", "latin extended-f",
472 "latin extended-g", "latin extended additional"},
473 {"aa", "abr", "ace", "ach", "ada", "af", "agq", "ain", "ak", "akz", "ale", "aln",
474 "amo", "an", "ang", "aoz", "arn", "aro", "arp", "arw", "asa", "ast", "atj", "avk",
475 "ay", "az", "bal", "ban", "bar", "bas", "bbc", "bbj", "bci", "bem", "bew", "bez",
476 "bfd", "bi", "bik", "bin", "bjn", "bkm", "bku", "bla", "bm", "bmq", "bqv", "br",
477 "brh", "bs", "bss", "bto", "buc", "bug", "bum", "bvb", "byv", "bze", "bzx", "ca",
478 "cad", "car", "cay", "cch", "ceb", "cgg", "ch", "chk", "chn", "cho", "chp", "chy",
479 "cic", "co", "cps", "cr", "crj", "crl", "crs", "cs", "csb", "ctd", "cy", "da",
480 "dak", "dav", "de", "del", "den", "dgr", "din", "dje", "dnj", "dsb", "dtm", "dtp",
481 "dua", "dum", "dyo", "dyu", "ebu", "ee", "efi", "egl", "eka", "en", "enm", "eo",
482 "es", "esu", "et", "ett", "eu", "ewo", "ext", "fan", "ff", "ffm", "fi", "fil",
483 "fit", "fj", "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr", "frs", "fud",
484 "fuq", "fur", "fuv", "fvr", "fy", "ga", "gaa", "gag", "gay", "gba", "gcr", "gd",
485 "gil", "gl", "gmh", "gn", "goh", "gor", "gos", "grb", "gsw", "gub", "guc", "gur",
486 "guz", "gv", "gwi", "ha", "hai", "haw", "hi", "hif", "hil", "hmn", "hnn", "ho",
487 "hop", "hr", "hsb", "ht", "hu", "hup", "hz", "ia", "iba", "ibb", "id", "ife",
488 "ig", "ii", "ik", "ikt", "ilo", "inh", "is", "it", "iu", "izh", "jam", "jgo",
489 "jmc", "jut", "jv", "kab", "kac", "kaj", "kam", "kao", "kcg", "kck", "kde", "kea",
490 "kfo", "kg", "kge", "kgp", "kha", "khq", "ki", "kiu", "kj", "kjg", "kkj", "kl",
491 "kln", "kmb", "kos", "kpe", "kr", "kri", "krj", "krl", "ksb", "ksf", "ksh", "ku",
492 "kut", "kvr", "kw", "ky", "la", "lag", "laj", "lam", "lb", "lbw", "lfn", "lg",
493 "li", "lij", "liv", "ljp", "lkt", "lmo", "ln", "lol", "loz", "lt", "ltg", "lu",
494 "lua", "lui", "lun", "luo", "lut", "luy", "lv", "lzz", "mad", "maf", "mak", "man",
495 "mas", "maz", "mdh", "mdr", "mdt", "men", "mer", "mfe", "mg", "mgh", "mgo", "mgy",
496 "mh", "mi", "mic", "min", "mls", "moe", "moh", "mos", "mro", "ms", "mt", "mua",
497 "mus", "mwk", "mwl", "mwv", "mxc", "myx", "na", "nap", "naq", "nb", "nch", "nd",
498 "ndc", "nds", "ng", "ngl", "nhe", "nhw", "nia", "nij", "niu", "njo", "nl", "nmg",
499 "nn", "nnh", "no", "nov", "nr", "nsk", "nso", "nus", "nv", "nxq", "ny", "nym",
500 "nyn", "nyo", "nzi", "oc", "oj", "om", "osa", "osc", "pag", "pam", "pap", "pau",
501 "pcd", "pcm", "pdc", "pdt", "pfl", "pko", "pl", "pms", "pnt", "pon", "prg", "pro",
502 "pt", "puu", "qu", "quc", "qug", "rap", "rar", "rcf", "rej", "rgn", "ria", "rif",
503 "rm", "rmf", "rmo", "rmu", "rn", "rng", "ro", "rob", "rof", "rom", "rtm", "rug",
504 "rup", "rw", "rwk", "sad", "saf", "saq", "sas", "sat", "sbp", "sc", "scn", "sco",
505 "scs", "sdc", "se", "see", "sef", "seh", "sei", "ses", "sg", "sga", "sgs", "shi",
506 "sid", "sk", "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", "sn", "snk",
507 "so", "sq", "sr", "srb", "srn", "srr", "ss", "ssy", "st", "stq", "su", "suk",
508 "sus", "sv", "sw", "swb", "swg", "sxn", "syi", "szl", "tbw", "tem", "teo", "ter",
509 "tet", "tg", "tiv", "tk", "tkl", "tkr", "tli", "tly", "tmh", "tn", "to", "tog",
510 "tpi", "tr", "tru", "trv", "ts", "tsg", "tsi", "ttj", "ttt", "tum", "tvl", "twq",
511 "ty", "tzm", "udm", "ug", "uli", "umb", "uz", "vai", "ve", "vec", "vep", "vi",
512 "vic", "vls", "vmf", "vmw", "vo", "vot", "vro", "vun", "wa", "wae", "war", "was",
513 "wbp", "wls", "wo", "xav", "xh", "xog", "xum", "yao", "yap", "yav", "ybb", "yo",
514 "yrl", "yua", "za", "zag", "zap", "zea", "zmi", "zu", "zun", "zza"}},
515 {{"cyrillic", "cyrillic supplement", "cyrillic extended-a", "cyrillic extended-b",
516 "cyrillic extended-c"},
517 {"ab", "abq", "ady", "aii", "alt", "av", "az", "ba", "be", "bg", "bs", "bua",
518 "ce", "chm", "cjs", "ckt", "crh", "cu", "cv", "dar", "dng", "evn", "gag", "gld",
519 "inh", "kaa", "kbd", "kca", "kjh", "kk", "koi", "kpy", "krc", "ku", "kum", "kv",
520 "ky", "lbe", "lez", "lfn", "mdf", "mk", "mn", "mns", "mrj", "myv", "nog", "os",
521 "pnt", "ro", "rom", "ru", "rue", "sah", "se", "sel", "sr", "tab", "tg", "tk",
522 "tkr", "tly", "tt", "ttt", "tyv", "ude", "udm", "ug", "uk", "uz", "xal", "yrk"}},
523 {{"devanagari", "devanagari extended", "vedic extensions"},
524 {"anp", "awa", "bap", "bfy", "bgc", "bhb", "bhi", "bho", "bjj", "bra", "brx",
525 "btv", "doi", "dty", "gbm", "gom", "gon", "gvr", "hi", "hif", "hne", "hoc",
526 "hoj", "jml", "kfr", "kfy", "khn", "kok", "kru", "ks", "lif", "mag", "mai",
527 "mgp", "mr", "mrd", "mtr", "mwr", "ne", "new", "noe", "pi", "raj", "rjs",
528 "sa", "sat", "sck", "sd", "srx", "swv", "taj", "tdg", "tdh", "thl", "thq",
529 "thr", "tkt", "unr", "unx", "wbr", "wtm", "xnr", "xsr"}},
530 {{"arabic", "arabic supplement", "arabic extended-a", "arabic presentation forms-a",
531 "arabic presentation forms-b", "arabic mathematical alphabetic symbols"},
532 {"aeb", "ar", "arq", "ars", "ary", "arz", "az", "bal", "bej", "bft", "bgn", "bqi",
533 "brh", "cja", "cjm", "ckb", "cop", "dcc", "doi", "dyo", "fa", "fia", "gbz", "gjk",
534 "gju", "glk", "ha", "haz", "hnd", "hno", "id", "inh", "khw", "kk", "ks", "ku",
535 "kvx", "kxp", "ky", "lah", "lki", "lrc", "luz", "mfa", "ms", "mvy", "mzn", "pa",
536 "prd", "ps", "rmt", "sd", "sdh", "shi", "skr", "so", "sus", "swb", "tg", "tk",
537 "tly", "tr", "trw", "ttt", "ug", "ur", "uz", "wni", "wo", "zdj"}},
538 {{"cjk radicals supplement", "cjk strokes", "cjk symbols and punctuation",
539 "cjk unified ideographs", "cjk unified ideographs extension a",
540 "cjk unified ideographs extension b", "cjk unified ideographs extension c",
541 "cjk unified ideographs extension d", "cjk unified ideographs extension e",
542 "cjk unified ideographs extension f", "cjk unified ideographs extension g",
543 "cjk compatibility", "cjk compatibility ideographs", "cjk compatibility forms",
544 "cjk compatibility ideographs supplement", "kangxi radicals"},
545 {"gan", "hak", "hsn", "lzh", "nan", "vi", "wuu", "yue", "za", "zh", "ko", "ja"}},
546 {{"greek and coptic", "greek extended", "coptic"},
547 {"bgx", "cop", "el", "grc", "pnt", "tsd"}},
548 {{"bengali"},
549 {"as", "bn", "bpy", "ccp", "grt", "kha", "lus", "mni", "rkt", "sat", "syl", "unr",
550 "unx"}},
551 {{"thai"}, {"kdt", "kxm", "lcp", "lwl", "pi", "sou", "th", "tts"}},
552 {{"ethiopic", "ethiopic supplement", "ethiopic extended"},
553 {"am", "byn", "gez", "om", "ti", "tig", "wal"}},
554 {{"hebrew"}, {"he", "jpr", "jrb", "lad", "sam", "yi"}},
555 {{"tibetan"}, {"bft", "bo", "dz", "taj", "tdg", "tsj"}},
556 {{"unified canadian aboriginal syllabics"}, {"bft", "bo", "dz", "taj", "tdg", "tsj"}},
557 {{"tifinagh"}, {"rif", "shi", "tzm", "zen", "zgh"}},
558 {{"telugu"}, {"gon", "lmn", "te", "wbq"}},
559 {{"syriac"}, {"aii", "ar", "syr", "tru"}},
560 {{"myanmar"}, {"kht", "mnw", "my", "shn"}},
561 {{"nko"}, {"bm", "man", "nqo"}},
562 {{"buginese"}, {"bug", "mak", "mdr"}},
563 {{"old italic"}, {"ett", "osc", "xum"}},
564 {{"lao"}, {"hnj", "kjg", "lo"}},
565 {{"georgian", "georgian supplement"}, {"ka", "lzz", "xmf"}},
566 {{"sinhala"}, {"pi", "sa", "si"}},
567 {{"tamil"}, {"bfq", "ta"}},
568 {{"katakana", "katakana phonetic extensions"}, {"ain", "ryu"}},
569 {{"cuneiform", "cuneiform numbers and punctuation"}, {"akk", "hit"}},
570 {{"cham"}, {"cja", "cjm"}},
571 {{"runic"}, {"de", "non"}},
572 {{"kayah"}, {"eky", "kyu"}},
573 {{"kannada"}, {"kn", "tcy"}},
574 {{"mongolian"}, {"mn", "mnc"}},
575 {{"phags-pa"}, {"mn", "zh"}},
576 {{"oriya"}, {"or", "sat"}},
577 {{"samaritan"}, {"sam", "smp"}},
578 {{"armenian"}, {"hy"}},
579 {{"javanese"}, {"jv"}},
580 {{"gujarati"}, {"gu"}},
581 {{"malayalam"}, {"ml"}},
582 {{"avestan"}, {"ae"}},
583 {{"aramaic"}, {"arc"}},
584 {{"balinese"}, {"ban"}},
585 {{"bamum"}, {"bax"}},
586 {{"batak"}, {"bbc"}},
587 {{"buhid"}, {"bku"}},
588 {{"tai viet"}, {"blt"}},
589 {{"chakma"}, {"ccp"}},
590 {{"cherokee"}, {"chr"}},
591 {{"takri"}, {"doi"}},
592 {{"thaana"}, {"dv"}},
593 {{"egyptian hieroglyphs"}, {"egy"}},
594 {{"adlam"}, {"ff"}},
595 {{"tagalog"}, {"fil"}},
596 {{"gothic"}, {"got"}},
597 {{"cypriot"}, {"grc"}},
598 {{"linear b syllabary", "linear b ideograms"}, {"grc"}},
599 {{"mahajani"}, {"hi"}},
600 {{"pollard"}, {"hmd"}},
601 {{"pahawh hmong"}, {"hmn"}},
602 {{"hanunoo"}, {"hnn"}},
603 {{"varang kshiti"}, {"hoc"}},
604 {{"yi syllables", "yi radicals"}, {"ii"}},
605 {{"hiragana", "katakana"}, {"ja"}},
606 {{"new tai lue"}, {"khb"}},
607 {{"khmer", "khmer symbols"}, {"km"}},
608 {{"hangul jamo", "hangul compatibility jamo", "hangul jamo extended-a",
609 "hangul jamo extended-b", "hangul syllables"},
610 {"ko"}},
611 {{"permic"}, {"kv"}},
612 {{"linear a"}, {"lab"}},
613 {{"lepcha"}, {"lep"}},
614 {{"caucasian albanian"}, {"lez"}},
615 {{"limbu"}, {"lif"}},
616 {{"lisu"}, {"lis"}},
617 {{"tirhuta"}, {"mai"}},
618 {{"mende"}, {"men"}},
619 {{"meetei mayek"}, {"mni"}},
620 {{"modi"}, {"mr"}},
621 {{"mro"}, {"mro"}},
622 {{"mandaic"}, {"myz"}},
623 {{"tai tham"}, {"nod"}},
624 {{"osage"}, {"osa"}},
625 {{"old turkic"}, {"otk"}},
626 {{"gurmukhi"}, {"pa"}},
627 {{"inscriptional pahlavi"}, {"pal"}},
628 {{"old persian"}, {"peo"}},
629 {{"phoenician"}, {"phn"}},
630 {{"rejang"}, {"rej"}},
631 {{"grantha", "sharada", "siddham"}, {"sa"}},
632 {{"ol chiki"}, {"sat"}},
633 {{"saurashtra"}, {"saz"}},
634 {{"khojki", "khudawadi"}, {"sd"}},
635 {{"ogham"}, {"sga"}},
636 {{"osmanya"}, {"so"}},
637 {{"elbasan"}, {"sq"}},
638 {{"sora sompeng"}, {"srb"}},
639 {{"sundanese"}, {"su"}},
640 {{"syloti nagri"}, {"syl"}},
641 {{"tagbanwa"}, {"tbw"}},
642 {{"tai le"}, {"tdd"}},
643 {{"ugaritic"}, {"uga"}},
644 {{"vai"}, {"vai"}},
645 {{"carian"}, {"xcr"}},
646 {{"lycian"}, {"xlc"}},
647 {{"lydian"}, {"xld"}},
648 {{"manichaean"}, {"xmn"}},
649 {{"meroitic"}, {"xmr"}},
650 {{"old north arabian"}, {"xna"}},
651 {{"inscriptional parthian"}, {"xpr"}},
652 {{"old south arabian"}, {"xsa"}},
653 {{"bopomofo", "bopomofo extended"}, {"zh"}}};
654
655 // Add basic latin (= ASCII) for all languages (required to display URLs for example).
657 // Add "geometric shapes" to allow rendering the "missing character" glyph
658 add_allowed_code_points_for_group("geometric shapes");
659
660 for (const auto& script : scripts) {
661 if (std::find(script.second.begin(), script.second.end(), language_code) ==
662 script.second.end())
663 continue;
664
665 for (const auto& code_range : script.first)
667 }
668}
669
672
673 if (languages_.empty()) {
674 // If no language specified, fall back to basic latin (=ASCII)
676 // Add "geometric shapes" to allow rendering the "missing character" glyph
677 add_allowed_code_points_for_group("geometric shapes");
678 return;
679 }
680
681 // Add language-specific groups
682 for (const auto& language : languages_) {
683 // Extract the language code from the language string (first set of lower case letters)
684 auto pos =
685 std::find_if(language.begin(), language.end(), [](char c) { return std::isupper(c); });
686
687 add_allowed_code_points_for_language(std::string(language.begin(), pos));
688 }
689}
690
691const std::vector<code_point_range>& localizer::get_allowed_code_points() const {
692 return code_points_;
693}
694
695void localizer::set_fallback_code_point(char32_t code_point) {
696 default_code_point_ = code_point;
697}
698
700 return default_code_point_;
701}
702
703void localizer::load_translations(const std::string& folder_path) {
704 // First, look for an exact match
705 for (const std::string& language : languages_) {
706 std::string language_file = folder_path + "/" + language + ".lua";
707 if (utils::file_exists(language_file)) {
708 load_translation_file(language_file);
709 return;
710 }
711 }
712
713 // If no exact match found, look for an approximate match (ignore region)
714 const auto files = utils::get_file_list(folder_path, false, "lua");
715 for (const std::string& language : languages_) {
716 auto iter = std::find_if(files.begin(), files.end(), [&](const std::string& file) {
717 return file.size() == 8u && file.substr(0, 2) == language.substr(0, 2);
718 });
719
720 if (iter == files.end())
721 continue;
722
723 std::string language_file = folder_path + "/" + *iter;
724 load_translation_file(language_file);
725 return;
726 }
727
728 // If no match found, fall back to US english
729 std::string language_file = folder_path + "/enUS.lua";
730 if (utils::file_exists(language_file)) {
731 load_translation_file(language_file);
732 return;
733 }
734}
735
736void localizer::load_translation_file(const std::string& file_name) try {
737 auto result = lua_.do_file(file_name);
738 if (!result.valid()) {
739 gui::out << gui::error << "gui::locale: " << result.get<sol::error>().what() << std::endl;
740 return;
741 }
742
743 sol::table table = lua_["localize"];
744 if (table == sol::lua_nil) {
745 gui::out << gui::warning << "gui::locale: no 'localize' table in " << file_name
746 << std::endl;
747 return;
748 }
749
750 table.for_each([&](const sol::object& key, const sol::object& value) {
751 if (!key.is<std::string>())
752 return;
753 std::string ks = key.as<std::string>();
754
755 if (value.is<std::string>())
756 map_.insert(std::make_pair(std::hash<std::string>{}(ks), value.as<std::string>()));
757 else if (value.is<sol::protected_function>())
758 map_.insert(
759 std::make_pair(std::hash<std::string>{}(ks), value.as<sol::protected_function>()));
760 });
761
762 // Keep a copy so variables/functions remain alive
763 lua_["localize_" + std::to_string(std::hash<std::string>{}(file_name))] = table;
764} catch (const sol::error& err) {
765 gui::out << gui::error << "gui::locale: " << err.what() << std::endl;
766 return;
767}
768
770 map_.clear();
771}
772
773bool localizer::is_key_valid_(std::string_view key) const {
774 return !key.empty() && key.front() == '{' && key.back() == '}';
775}
776
777localizer::map_type::const_iterator localizer::find_key_(std::string_view key) const {
778 auto substring = key.substr(1, key.size() - 2);
779 return map_.find(std::hash<std::string_view>{}(substring));
780}
781
782std::string localizer::format_string(std::string_view message, sol::variadic_args args) const {
783 fmt::dynamic_format_arg_store<fmt::format_context> store;
784 for (auto&& arg : args) {
785 lxgui::utils::variant variant;
786 if (!arg.is<sol::lua_nil_t>())
787 variant = arg;
788
789 std::visit(
790 [&](auto& value) {
791 using inner_type = std::decay_t<decltype(value)>;
792 if constexpr (std::is_same_v<inner_type, lxgui::utils::empty>)
793 store.push_back(static_cast<const char*>(""));
794 else
795 store.push_back(value);
796 },
797 variant);
798 }
799
800 return fmt::vformat(locale_, message, store);
801}
802
803std::string localizer::localize(std::string_view key, sol::variadic_args args) const {
804 if (!is_key_valid_(key))
805 return std::string{key};
806
807 auto iter = find_key_(key);
808 if (iter == map_.end())
809 return std::string{key};
810
811 return std::visit(
812 [&](const auto& item) {
813 using inner_type = std::decay_t<decltype(item)>;
814 if constexpr (std::is_same_v<inner_type, std::string>) {
815 return format_string(item, args);
816 } else {
817 auto result = item(args);
818 if (!result.valid()) {
820 << "gui::locale: " << result.template get<sol::error>().what()
821 << std::endl;
822 return std::string{key};
823 }
824
825 if (result.begin() != result.end()) {
826 auto&& first = *result.begin();
827 if (first.template is<std::string>())
828 return first.template as<std::string>();
829 }
830
831 return std::string{key};
832 }
833 },
834 iter->second);
835}
836
837} // namespace lxgui::gui
Exception to be thrown by GUI code.
void add_allowed_code_points_for_group(const std::string &unicode_group)
Adds a new range to the set of allowed code points from a Unicode group.
void clear_translations()
Removes all previously loaded translations.
void clear_allowed_code_points()
Removes all allowed code points.
const std::vector< std::string > & get_preferred_languages() const
Returns the list of code names of the preferred languages (used to translate messages and.
void auto_detect_preferred_languages()
Attempts to automatically detect the current language (used to translate messages and.
void load_translations(const std::string &folder_path)
Loads new translations from a folder, selecting the language automatically.
void add_allowed_code_points(const code_point_range &range)
Adds a new range to the set of allowed code points.
char32_t get_fallback_code_point() const
Returns the default character to display if a character is missing from a font.
void set_preferred_languages(const std::vector< std::string > &languages)
Changes the current language (used to translate messages and strings).
const std::vector< code_point_range > & get_allowed_code_points() const
Returns the list of allowed code points (Unicode characters), for text rendering.
void add_allowed_code_points_for_language(const std::string &language_code)
Adds a new range to the set of allowed code points for a given language.
std::string format_string(std::string_view message, sol::variadic_args args) const
Translates a string with a certain number of arguments from Lua (zero or many).
localizer()
Default constructor.
void set_locale(const std::locale &locale)
Changes the current locale (used to format numbers).
void auto_detect_allowed_code_points()
Attempts to automatically detect the set of allowed code points based on preferred.
void set_fallback_code_point(char32_t code_point)
Sets the default character to display if a character is missing from a font.
const std::locale & get_locale() const
Returns the current locale (used to format numbers).
std::string localize(std::string_view key, sol::variadic_args args) const
Translates a string with a certain number of arguments from Lua (zero or many).
void load_translation_file(const std::string &file_name)
Loads new translations from a file.
const char * what() const noexcept override
Override std::exception::what()
std::ostream out
const std::string warning
Definition gui_out.cpp:6
const std::string error
Definition gui_out.cpp:7
std::variant< empty, bool, std::int64_t, std::int32_t, std::int16_t, std::int8_t, std::uint64_t, std::uint32_t, std::uint16_t, std::uint8_t, double, float, std::string > variant
Type-erased value for passing arguments to events.
string_vector get_file_list(const std::string &rel_path, bool with_path)
bool file_exists(const std::string &file)
Represents a contiguous range of unicode code points.