diff --git a/arm9/source/common/ui.c b/arm9/source/common/ui.c index a72f1cf..47a7726 100644 --- a/arm9/source/common/ui.c +++ b/arm9/source/common/ui.c @@ -242,12 +242,14 @@ void DrawCharacter(u8* screen, int character, int x, int y, int color, int bgcol } } -void DrawString(u8* screen, const char *str, int x, int y, int color, int bgcolor) +void DrawString(u8* screen, const char *str, int x, int y, int color, int bgcolor, bool fix_utf8) { size_t max_len = (((screen == TOP_SCREEN) ? SCREEN_WIDTH_TOP : SCREEN_WIDTH_BOT) - x) / font_width; size_t len = (strlen(str) > max_len) ? max_len : strlen(str); - for (size_t i = 0; i < len; i++) - DrawCharacter(screen, str[i], x + i * font_width, y, color, bgcolor); + for (size_t i = 0; i < len; i++) { + char c = (char) (fix_utf8 && str[i] >= 0x80) ? '?' : str[i]; + DrawCharacter(screen, c, x + i * font_width, y, color, bgcolor); + } } void DrawStringF(u8* screen, int x, int y, int color, int bgcolor, const char *format, ...) @@ -259,7 +261,7 @@ void DrawStringF(u8* screen, int x, int y, int color, int bgcolor, const char *f va_end(va); for (char* text = strtok(str, "\n"); text != NULL; text = strtok(NULL, "\n"), y += line_height) - DrawString(screen, text, x, y, color, bgcolor); + DrawString(screen, text, x, y, color, bgcolor, true); } void DrawStringCenter(u8* screen, int color, int bgcolor, const char *format, ...) @@ -921,14 +923,14 @@ bool ShowProgress(u64 current, u64 total, const char* opstr) TruncateString(progstr, opstr, (bar_width / FONT_WIDTH_EXT) - 7, 8); snprintf(tempstr, 64, "%s (%lu%%)", progstr, prog_percent); ResizeString(progstr, tempstr, bar_width / FONT_WIDTH_EXT, 8, false); - DrawString(MAIN_SCREEN, progstr, bar_pos_x, text_pos_y, COLOR_STD_FONT, COLOR_STD_BG); + DrawString(MAIN_SCREEN, progstr, bar_pos_x, text_pos_y, COLOR_STD_FONT, COLOR_STD_BG, true); if (sec_elapsed >= 1) { snprintf(tempstr, 16, "ETA %02llum%02llus", sec_remain / 60, sec_remain % 60); ResizeString(progstr, tempstr, 16, 8, true); DrawString(MAIN_SCREEN, progstr, bar_pos_x + bar_width - 1 - (FONT_WIDTH_EXT * 16), - bar_pos_y - line_height - 1, COLOR_STD_FONT, COLOR_STD_BG); + bar_pos_y - line_height - 1, COLOR_STD_FONT, COLOR_STD_BG, true); } - DrawString(MAIN_SCREEN, "(hold B to cancel)", bar_pos_x + 2, text_pos_y + 14, COLOR_STD_FONT, COLOR_STD_BG); + DrawString(MAIN_SCREEN, "(hold B to cancel)", bar_pos_x + 2, text_pos_y + 14, COLOR_STD_FONT, COLOR_STD_BG, false); last_prog_width = prog_width; diff --git a/arm9/source/common/ui.h b/arm9/source/common/ui.h index dcded0d..e1d8794 100644 --- a/arm9/source/common/ui.h +++ b/arm9/source/common/ui.h @@ -54,7 +54,7 @@ void DrawBitmap(u8* screen, int x, int y, int w, int h, u8* bitmap); void DrawQrCode(u8* screen, u8* qrcode); void DrawCharacter(unsigned char *screen, int character, int x, int y, int color, int bgcolor); -void DrawString(unsigned char *screen, const char *str, int x, int y, int color, int bgcolor); +void DrawString(unsigned char *screen, const char *str, int x, int y, int color, int bgcolor, bool fix_utf8); void DrawStringF(unsigned char *screen, int x, int y, int color, int bgcolor, const char *format, ...); void DrawStringCenter(u8* screen, int color, int bgcolor, const char *format, ...); diff --git a/arm9/source/common/utf.c b/arm9/source/common/utf.c new file mode 100644 index 0000000..12d8a66 --- /dev/null +++ b/arm9/source/common/utf.c @@ -0,0 +1,272 @@ +#include "utf.h" + +#define UTF_MAX_UNITS 256 +#define ASCII_UNKNOWN ((u8) '?') + +// most of the code here shamelessly stolen from: +// https://github.com/smealum/ctrulib/tree/bd34fd59dbf0691e2dba76be65f260303d8ccec7/libctru/source/util/utf + + +int decode_utf8(u32 *out, const u8 *in) +{ + u8 code1, code2, code3, code4; + + code1 = *in++; + if(code1 < 0x80) + { + /* 1-byte sequence */ + *out = code1; + return 1; + } + else if(code1 < 0xC2) + { + return -1; + } + else if(code1 < 0xE0) + { + /* 2-byte sequence */ + code2 = *in++; + if((code2 & 0xC0) != 0x80) + { + return -1; + } + + *out = (code1 << 6) + code2 - 0x3080; + return 2; + } + else if(code1 < 0xF0) + { + /* 3-byte sequence */ + code2 = *in++; + if((code2 & 0xC0) != 0x80) + { + return -1; + } + if(code1 == 0xE0 && code2 < 0xA0) + { + return -1; + } + + code3 = *in++; + if((code3 & 0xC0) != 0x80) + { + return -1; + } + + *out = (code1 << 12) + (code2 << 6) + code3 - 0xE2080; + return 3; + } + else if(code1 < 0xF5) + { + /* 4-byte sequence */ + code2 = *in++; + if((code2 & 0xC0) != 0x80) + { + return -1; + } + if(code1 == 0xF0 && code2 < 0x90) + { + return -1; + } + if(code1 == 0xF4 && code2 >= 0x90) + { + return -1; + } + + code3 = *in++; + if((code3 & 0xC0) != 0x80) + { + return -1; + } + + code4 = *in++; + if((code4 & 0xC0) != 0x80) + { + return -1; + } + + *out = (code1 << 18) + (code2 << 12) + (code3 << 6) + code4 - 0x3C82080; + return 4; + } + + return -1; +} + +int decode_utf16(u32 *out, const u16 *in) +{ + u16 code1, code2; + + code1 = *in++; + if(code1 >= 0xD800 && code1 < 0xDC00) + { + /* surrogate pair */ + code2 = *in++; + if(code2 >= 0xDC00 && code2 < 0xE000) + { + *out = (code1 << 10) + code2 - 0x35FDC00; + return 2; + } + + return -1; + } + + *out = code1; + return 1; +} + +int encode_utf8(u8 *out, u32 in) +{ + if(in < 0x80) + { + if(out != NULL) + *out++ = in; + return 1; + } + else if(in < 0x800) + { + if(out != NULL) + { + *out++ = (in >> 6) + 0xC0; + *out++ = (in & 0x3F) + 0x80; + } + return 2; + } + else if(in < 0x10000) + { + if(out != NULL) + { + *out++ = (in >> 12) + 0xE0; + *out++ = ((in >> 6) & 0x3F) + 0x80; + *out++ = (in & 0x3F) + 0x80; + } + return 3; + } + else if(in < 0x110000) + { + if(out != NULL) + { + *out++ = (in >> 18) + 0xF0; + *out++ = ((in >> 12) & 0x3F) + 0x80; + *out++ = ((in >> 6) & 0x3F) + 0x80; + *out++ = (in & 0x3F) + 0x80; + } + return 4; + } + + return -1; +} + +int encode_utf16(u16 *out, u32 in) +{ + if(in < 0x10000) + { + if(out != NULL) + *out++ = in; + return 1; + } + else if(in < 0x110000) + { + if(out != NULL) + { + *out++ = (in >> 10) + 0xD7C0; + *out++ = (in & 0x3FF) + 0xDC00; + } + return 2; + } + + return -1; +} + +int utf16_to_utf8(u8 *out, const u16 *in, int len_out, int len_in) +{ + int rc = 0; + int units; + u32 code; + u8 encoded[4]; + + do + { + units = decode_utf16(&code, in); + if(units == -1) + return -1; + + if (len_in >= units) + len_in -= units; + else return -1; + + if(code > 0) + { + in += units; + + units = encode_utf8(encoded, code); + if(units == -1) + return -1; + + if(out != NULL) + { + if(rc + units <= len_out) + { + *out++ = encoded[0]; + if(units > 1) + *out++ = encoded[1]; + if(units > 2) + *out++ = encoded[2]; + if(units > 3) + *out++ = encoded[3]; + } + } + + if(UTF_MAX_UNITS - units >= rc) + rc += units; + else + return -1; + } + } while(code > 0 && len_in > 0); + + return rc; +} + +int utf8_to_utf16(u16 *out, const u8 *in, int len_out, int len_in) +{ + int rc = 0; + int units; + u32 code; + u16 encoded[2]; + + do + { + units = decode_utf8(&code, in); + if(units == -1) + return -1; + + if (len_in >= units) + len_in -= units; + else return -1; + + if(code > 0) + { + in += units; + + units = encode_utf16(encoded, code); + if(units == -1) + return -1; + + if(out != NULL) + { + if(rc + units <= len_out) + { + *out++ = encoded[0]; + if(units > 1) + *out++ = encoded[1]; + } + } + + if(UTF_MAX_UNITS - units >= rc) + rc += units; + else + return -1; + } + } while(code > 0 && len_in > 0); + + return rc; +} diff --git a/arm9/source/common/utf.h b/arm9/source/common/utf.h new file mode 100644 index 0000000..32aea1c --- /dev/null +++ b/arm9/source/common/utf.h @@ -0,0 +1,8 @@ +#pragma once + +#include "common.h" + +// most of the code here shamelessly stolen from: +// https://github.com/smealum/ctrulib/tree/bd34fd59dbf0691e2dba76be65f260303d8ccec7/libctru/source/util/utf +int utf16_to_utf8(u8 *out, const u16 *in, int len_out, int len_in); +int utf8_to_utf16(u16 *out, const u8 *in, int len_out, int len_in); diff --git a/arm9/source/fatfs/ffconf.h b/arm9/source/fatfs/ffconf.h index d2149a3..2102ba3 100644 --- a/arm9/source/fatfs/ffconf.h +++ b/arm9/source/fatfs/ffconf.h @@ -117,7 +117,7 @@ / ff_memfree() in ffsystem.c, need to be added to the project. */ -#define FF_LFN_UNICODE 0 +#define FF_LFN_UNICODE 2 /* This option switches the character encoding on the API when LFN is enabled. / / 0: ANSI/OEM in current CP (TCHAR = char) diff --git a/arm9/source/filesys/sddata.c b/arm9/source/filesys/sddata.c index 645f36e..8d59a37 100644 --- a/arm9/source/filesys/sddata.c +++ b/arm9/source/filesys/sddata.c @@ -155,7 +155,7 @@ FRESULT fx_open (FIL* fp, const TCHAR* path, BYTE mode) { u8 hashstr[256]; u8 sha256sum[32]; u32 plen = 0; - // poor man's UTF-8 -> UTF-16 / uppercase -> lowercase + // poor man's ASCII -> UTF-16 / uppercase -> lowercase for (plen = 0; plen < 128; plen++) { u8 symbol = path[2 + plen]; if ((symbol >= 'A') && (symbol <= 'Z')) symbol += ('a' - 'A'); diff --git a/arm9/source/game/romfs.c b/arm9/source/game/romfs.c index d7091cb..d51cd58 100644 --- a/arm9/source/game/romfs.c +++ b/arm9/source/game/romfs.c @@ -1,4 +1,5 @@ #include "romfs.h" +#include "utf.h" // validate header by checking offsets and sizes u32 ValidateLv3Header(RomFsLv3Header* lv3, u32 max_size) { @@ -40,11 +41,11 @@ u32 HashLv3Path(u16* wname, u32 name_len, u32 offset_parent) { RomFsLv3DirMeta* GetLv3DirMeta(const char* name, u32 offset_parent, RomFsLv3Index* index) { RomFsLv3DirMeta* meta; - // wide name + // wide (UTF-16) name u16 wname[256]; - u32 name_len = strnlen(name, 256); - for (name_len = 0; name[name_len]; name_len++) - wname[name_len] = name[name_len]; // poor mans UTF-8 -> UTF-16 + int name_len = utf8_to_utf16(wname, (u8*) name, 255, 255); + if (name_len <= 0) return NULL; + wname[name_len] = 0; // hashing, first offset u32 hash = HashLv3Path(wname, name_len, offset_parent); @@ -54,7 +55,7 @@ RomFsLv3DirMeta* GetLv3DirMeta(const char* name, u32 offset_parent, RomFsLv3Inde for (; offset < index->size_dirmeta; offset = meta->offset_samehash) { meta = (RomFsLv3DirMeta*) (index->dirmeta + offset); if ((offset_parent == meta->offset_parent) && - (name_len == meta->name_len / 2) && + ((u32) name_len == meta->name_len / 2) && (memcmp(wname, meta->wname, name_len * 2) == 0)) break; } @@ -65,11 +66,11 @@ RomFsLv3DirMeta* GetLv3DirMeta(const char* name, u32 offset_parent, RomFsLv3Inde RomFsLv3FileMeta* GetLv3FileMeta(const char* name, u32 offset_parent, RomFsLv3Index* index) { RomFsLv3FileMeta* meta; - // wide name + // wide (UTF-16) name u16 wname[256]; - u32 name_len = strnlen(name, 256); - for (name_len = 0; name[name_len]; name_len++) - wname[name_len] = name[name_len]; // poor mans UTF-8 -> UTF-16 + int name_len = utf8_to_utf16(wname, (u8*) name, 255, 255); + if (name_len <= 0) return NULL; + wname[name_len] = 0; // hashing, first offset u32 hash = HashLv3Path(wname, name_len, offset_parent); @@ -79,7 +80,7 @@ RomFsLv3FileMeta* GetLv3FileMeta(const char* name, u32 offset_parent, RomFsLv3In for (; offset < index->size_filemeta; offset = meta->offset_samehash) { meta = (RomFsLv3FileMeta*) (index->filemeta + offset); if ((offset_parent == meta->offset_parent) && - (name_len == meta->name_len / 2) && + ((u32) name_len == meta->name_len / 2) && (memcmp(wname, meta->wname, name_len * 2) == 0)) break; } diff --git a/arm9/source/godmode.c b/arm9/source/godmode.c index 52b7671..ace1cdb 100644 --- a/arm9/source/godmode.c +++ b/arm9/source/godmode.c @@ -619,7 +619,7 @@ u32 FileHexViewer(const char* path) { if (x_off >= 0) DrawStringF(screen, x_off - x0, y, cutoff ? COLOR_HVOFFS : COLOR_HVOFFSI, COLOR_STD_BG, "%08X", (unsigned int) offset + curr_pos); if (x_ascii >= 0) { - DrawString(screen, ascii, x_ascii - x0, y, COLOR_HVASCII, COLOR_STD_BG); + DrawString(screen, ascii, x_ascii - x0, y, COLOR_HVASCII, COLOR_STD_BG, false); for (u32 i = marked0; i < marked1; i++) DrawCharacter(screen, ascii[i % cols], x_ascii - x0 + (FONT_WIDTH_EXT * i), y, COLOR_MARKED, COLOR_STD_BG); if (edit_mode && ((u32) cursor / cols == row)) DrawCharacter(screen, ascii[cursor % cols], diff --git a/arm9/source/utils/scripting.c b/arm9/source/utils/scripting.c index 033487b..6c14a87 100644 --- a/arm9/source/utils/scripting.c +++ b/arm9/source/utils/scripting.c @@ -1394,7 +1394,7 @@ void MemTextView(const char* text, u32 len, char* line0, int off_disp, int lno, if (ar) memcpy(txtstr + p_ar, ar_str, strnlen(ar_str, 16)); // draw line number & text - DrawString(TOP_SCREEN, txtstr, x_txt, y, color_text, COLOR_STD_BG); + DrawString(TOP_SCREEN, txtstr, x_txt, y, color_text, COLOR_STD_BG, false); if (TV_LNOS > 0) { // line number if (ptr != ptr_next) DrawStringF(TOP_SCREEN, x_lno, y, ((ptr == text) || (*(ptr-1) == '\n')) ? COLOR_TVOFFS : COLOR_TVOFFSL, COLOR_STD_BG, "%0*lu", TV_LNOS, nln); @@ -1404,7 +1404,7 @@ void MemTextView(const char* text, u32 len, char* line0, int off_disp, int lno, // colorize comment if is_script if ((cmt_start > 0) && ((u32) cmt_start < TV_LLEN_DISP)) { memset(txtstr, ' ', cmt_start); - DrawString(TOP_SCREEN, txtstr, x_txt, y, script_color_comment, COLOR_TRANSPARENT); + DrawString(TOP_SCREEN, txtstr, x_txt, y, script_color_comment, COLOR_TRANSPARENT, false); } // colorize arrows diff --git a/arm9/source/virtual/vgame.c b/arm9/source/virtual/vgame.c index 2c7cad6..572d71c 100644 --- a/arm9/source/virtual/vgame.c +++ b/arm9/source/virtual/vgame.c @@ -1,6 +1,7 @@ #include "vgame.h" #include "image.h" #include "game.h" +#include "utf.h" #include "aes.h" #define VGAME_BUFFER_SIZE 0x200000 // at least 2MB, multiple of 0x200 @@ -1139,8 +1140,7 @@ bool GetVGameLv3Filename(char* name, const VirtualFile* vfile, u32 n_chars) { name_len = filemeta->name_len / 2; } memset(name, 0, n_chars); - for (u32 i = 0; (i < (n_chars-1)) && (i < name_len); i++) - name[i] = wname[i]; // poor mans UTF-16 -> UTF-8 (doesn't work proper for special chars) + utf16_to_utf8((u8*) name, wname, n_chars-1, name_len); return true; }