Properly handle UTF-8 filenames

This commit is contained in:
d0k3 2018-03-16 00:25:00 +01:00
parent 02f5abb877
commit ac514669a1
10 changed files with 308 additions and 25 deletions

View File

@ -242,12 +242,14 @@ void DrawCharacter(u8* screen, int character, int x, int y, int color, int bgcol
} }
} }
void DrawString(u8* screen, const char *str, int x, int y, int color, int bgcolor) void DrawString(u8* screen, const char *str, int x, int y, int color, int bgcolor, bool fix_utf8)
{ {
size_t max_len = (((screen == TOP_SCREEN) ? SCREEN_WIDTH_TOP : SCREEN_WIDTH_BOT) - x) / font_width; size_t max_len = (((screen == TOP_SCREEN) ? SCREEN_WIDTH_TOP : SCREEN_WIDTH_BOT) - x) / font_width;
size_t len = (strlen(str) > max_len) ? max_len : strlen(str); size_t len = (strlen(str) > max_len) ? max_len : strlen(str);
for (size_t i = 0; i < len; i++) for (size_t i = 0; i < len; i++) {
DrawCharacter(screen, str[i], x + i * font_width, y, color, bgcolor); char c = (char) (fix_utf8 && str[i] >= 0x80) ? '?' : str[i];
DrawCharacter(screen, c, x + i * font_width, y, color, bgcolor);
}
} }
void DrawStringF(u8* screen, int x, int y, int color, int bgcolor, const char *format, ...) void DrawStringF(u8* screen, int x, int y, int color, int bgcolor, const char *format, ...)
@ -259,7 +261,7 @@ void DrawStringF(u8* screen, int x, int y, int color, int bgcolor, const char *f
va_end(va); va_end(va);
for (char* text = strtok(str, "\n"); text != NULL; text = strtok(NULL, "\n"), y += line_height) for (char* text = strtok(str, "\n"); text != NULL; text = strtok(NULL, "\n"), y += line_height)
DrawString(screen, text, x, y, color, bgcolor); DrawString(screen, text, x, y, color, bgcolor, true);
} }
void DrawStringCenter(u8* screen, int color, int bgcolor, const char *format, ...) void DrawStringCenter(u8* screen, int color, int bgcolor, const char *format, ...)
@ -921,14 +923,14 @@ bool ShowProgress(u64 current, u64 total, const char* opstr)
TruncateString(progstr, opstr, (bar_width / FONT_WIDTH_EXT) - 7, 8); TruncateString(progstr, opstr, (bar_width / FONT_WIDTH_EXT) - 7, 8);
snprintf(tempstr, 64, "%s (%lu%%)", progstr, prog_percent); snprintf(tempstr, 64, "%s (%lu%%)", progstr, prog_percent);
ResizeString(progstr, tempstr, bar_width / FONT_WIDTH_EXT, 8, false); ResizeString(progstr, tempstr, bar_width / FONT_WIDTH_EXT, 8, false);
DrawString(MAIN_SCREEN, progstr, bar_pos_x, text_pos_y, COLOR_STD_FONT, COLOR_STD_BG); DrawString(MAIN_SCREEN, progstr, bar_pos_x, text_pos_y, COLOR_STD_FONT, COLOR_STD_BG, true);
if (sec_elapsed >= 1) { if (sec_elapsed >= 1) {
snprintf(tempstr, 16, "ETA %02llum%02llus", sec_remain / 60, sec_remain % 60); snprintf(tempstr, 16, "ETA %02llum%02llus", sec_remain / 60, sec_remain % 60);
ResizeString(progstr, tempstr, 16, 8, true); ResizeString(progstr, tempstr, 16, 8, true);
DrawString(MAIN_SCREEN, progstr, bar_pos_x + bar_width - 1 - (FONT_WIDTH_EXT * 16), DrawString(MAIN_SCREEN, progstr, bar_pos_x + bar_width - 1 - (FONT_WIDTH_EXT * 16),
bar_pos_y - line_height - 1, COLOR_STD_FONT, COLOR_STD_BG); bar_pos_y - line_height - 1, COLOR_STD_FONT, COLOR_STD_BG, true);
} }
DrawString(MAIN_SCREEN, "(hold B to cancel)", bar_pos_x + 2, text_pos_y + 14, COLOR_STD_FONT, COLOR_STD_BG); DrawString(MAIN_SCREEN, "(hold B to cancel)", bar_pos_x + 2, text_pos_y + 14, COLOR_STD_FONT, COLOR_STD_BG, false);
last_prog_width = prog_width; last_prog_width = prog_width;

View File

@ -54,7 +54,7 @@ void DrawBitmap(u8* screen, int x, int y, int w, int h, u8* bitmap);
void DrawQrCode(u8* screen, u8* qrcode); void DrawQrCode(u8* screen, u8* qrcode);
void DrawCharacter(unsigned char *screen, int character, int x, int y, int color, int bgcolor); void DrawCharacter(unsigned char *screen, int character, int x, int y, int color, int bgcolor);
void DrawString(unsigned char *screen, const char *str, int x, int y, int color, int bgcolor); void DrawString(unsigned char *screen, const char *str, int x, int y, int color, int bgcolor, bool fix_utf8);
void DrawStringF(unsigned char *screen, int x, int y, int color, int bgcolor, const char *format, ...); void DrawStringF(unsigned char *screen, int x, int y, int color, int bgcolor, const char *format, ...);
void DrawStringCenter(u8* screen, int color, int bgcolor, const char *format, ...); void DrawStringCenter(u8* screen, int color, int bgcolor, const char *format, ...);

272
arm9/source/common/utf.c Normal file
View File

@ -0,0 +1,272 @@
#include "utf.h"
#define UTF_MAX_UNITS 256
#define ASCII_UNKNOWN ((u8) '?')
// most of the code here shamelessly stolen from:
// https://github.com/smealum/ctrulib/tree/bd34fd59dbf0691e2dba76be65f260303d8ccec7/libctru/source/util/utf
int decode_utf8(u32 *out, const u8 *in)
{
u8 code1, code2, code3, code4;
code1 = *in++;
if(code1 < 0x80)
{
/* 1-byte sequence */
*out = code1;
return 1;
}
else if(code1 < 0xC2)
{
return -1;
}
else if(code1 < 0xE0)
{
/* 2-byte sequence */
code2 = *in++;
if((code2 & 0xC0) != 0x80)
{
return -1;
}
*out = (code1 << 6) + code2 - 0x3080;
return 2;
}
else if(code1 < 0xF0)
{
/* 3-byte sequence */
code2 = *in++;
if((code2 & 0xC0) != 0x80)
{
return -1;
}
if(code1 == 0xE0 && code2 < 0xA0)
{
return -1;
}
code3 = *in++;
if((code3 & 0xC0) != 0x80)
{
return -1;
}
*out = (code1 << 12) + (code2 << 6) + code3 - 0xE2080;
return 3;
}
else if(code1 < 0xF5)
{
/* 4-byte sequence */
code2 = *in++;
if((code2 & 0xC0) != 0x80)
{
return -1;
}
if(code1 == 0xF0 && code2 < 0x90)
{
return -1;
}
if(code1 == 0xF4 && code2 >= 0x90)
{
return -1;
}
code3 = *in++;
if((code3 & 0xC0) != 0x80)
{
return -1;
}
code4 = *in++;
if((code4 & 0xC0) != 0x80)
{
return -1;
}
*out = (code1 << 18) + (code2 << 12) + (code3 << 6) + code4 - 0x3C82080;
return 4;
}
return -1;
}
int decode_utf16(u32 *out, const u16 *in)
{
u16 code1, code2;
code1 = *in++;
if(code1 >= 0xD800 && code1 < 0xDC00)
{
/* surrogate pair */
code2 = *in++;
if(code2 >= 0xDC00 && code2 < 0xE000)
{
*out = (code1 << 10) + code2 - 0x35FDC00;
return 2;
}
return -1;
}
*out = code1;
return 1;
}
int encode_utf8(u8 *out, u32 in)
{
if(in < 0x80)
{
if(out != NULL)
*out++ = in;
return 1;
}
else if(in < 0x800)
{
if(out != NULL)
{
*out++ = (in >> 6) + 0xC0;
*out++ = (in & 0x3F) + 0x80;
}
return 2;
}
else if(in < 0x10000)
{
if(out != NULL)
{
*out++ = (in >> 12) + 0xE0;
*out++ = ((in >> 6) & 0x3F) + 0x80;
*out++ = (in & 0x3F) + 0x80;
}
return 3;
}
else if(in < 0x110000)
{
if(out != NULL)
{
*out++ = (in >> 18) + 0xF0;
*out++ = ((in >> 12) & 0x3F) + 0x80;
*out++ = ((in >> 6) & 0x3F) + 0x80;
*out++ = (in & 0x3F) + 0x80;
}
return 4;
}
return -1;
}
int encode_utf16(u16 *out, u32 in)
{
if(in < 0x10000)
{
if(out != NULL)
*out++ = in;
return 1;
}
else if(in < 0x110000)
{
if(out != NULL)
{
*out++ = (in >> 10) + 0xD7C0;
*out++ = (in & 0x3FF) + 0xDC00;
}
return 2;
}
return -1;
}
int utf16_to_utf8(u8 *out, const u16 *in, int len_out, int len_in)
{
int rc = 0;
int units;
u32 code;
u8 encoded[4];
do
{
units = decode_utf16(&code, in);
if(units == -1)
return -1;
if (len_in >= units)
len_in -= units;
else return -1;
if(code > 0)
{
in += units;
units = encode_utf8(encoded, code);
if(units == -1)
return -1;
if(out != NULL)
{
if(rc + units <= len_out)
{
*out++ = encoded[0];
if(units > 1)
*out++ = encoded[1];
if(units > 2)
*out++ = encoded[2];
if(units > 3)
*out++ = encoded[3];
}
}
if(UTF_MAX_UNITS - units >= rc)
rc += units;
else
return -1;
}
} while(code > 0 && len_in > 0);
return rc;
}
int utf8_to_utf16(u16 *out, const u8 *in, int len_out, int len_in)
{
int rc = 0;
int units;
u32 code;
u16 encoded[2];
do
{
units = decode_utf8(&code, in);
if(units == -1)
return -1;
if (len_in >= units)
len_in -= units;
else return -1;
if(code > 0)
{
in += units;
units = encode_utf16(encoded, code);
if(units == -1)
return -1;
if(out != NULL)
{
if(rc + units <= len_out)
{
*out++ = encoded[0];
if(units > 1)
*out++ = encoded[1];
}
}
if(UTF_MAX_UNITS - units >= rc)
rc += units;
else
return -1;
}
} while(code > 0 && len_in > 0);
return rc;
}

8
arm9/source/common/utf.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include "common.h"
// most of the code here shamelessly stolen from:
// https://github.com/smealum/ctrulib/tree/bd34fd59dbf0691e2dba76be65f260303d8ccec7/libctru/source/util/utf
int utf16_to_utf8(u8 *out, const u16 *in, int len_out, int len_in);
int utf8_to_utf16(u16 *out, const u8 *in, int len_out, int len_in);

View File

@ -117,7 +117,7 @@
/ ff_memfree() in ffsystem.c, need to be added to the project. */ / ff_memfree() in ffsystem.c, need to be added to the project. */
#define FF_LFN_UNICODE 0 #define FF_LFN_UNICODE 2
/* This option switches the character encoding on the API when LFN is enabled. /* This option switches the character encoding on the API when LFN is enabled.
/ /
/ 0: ANSI/OEM in current CP (TCHAR = char) / 0: ANSI/OEM in current CP (TCHAR = char)

View File

@ -155,7 +155,7 @@ FRESULT fx_open (FIL* fp, const TCHAR* path, BYTE mode) {
u8 hashstr[256]; u8 hashstr[256];
u8 sha256sum[32]; u8 sha256sum[32];
u32 plen = 0; u32 plen = 0;
// poor man's UTF-8 -> UTF-16 / uppercase -> lowercase // poor man's ASCII -> UTF-16 / uppercase -> lowercase
for (plen = 0; plen < 128; plen++) { for (plen = 0; plen < 128; plen++) {
u8 symbol = path[2 + plen]; u8 symbol = path[2 + plen];
if ((symbol >= 'A') && (symbol <= 'Z')) symbol += ('a' - 'A'); if ((symbol >= 'A') && (symbol <= 'Z')) symbol += ('a' - 'A');

View File

@ -1,4 +1,5 @@
#include "romfs.h" #include "romfs.h"
#include "utf.h"
// validate header by checking offsets and sizes // validate header by checking offsets and sizes
u32 ValidateLv3Header(RomFsLv3Header* lv3, u32 max_size) { u32 ValidateLv3Header(RomFsLv3Header* lv3, u32 max_size) {
@ -40,11 +41,11 @@ u32 HashLv3Path(u16* wname, u32 name_len, u32 offset_parent) {
RomFsLv3DirMeta* GetLv3DirMeta(const char* name, u32 offset_parent, RomFsLv3Index* index) { RomFsLv3DirMeta* GetLv3DirMeta(const char* name, u32 offset_parent, RomFsLv3Index* index) {
RomFsLv3DirMeta* meta; RomFsLv3DirMeta* meta;
// wide name // wide (UTF-16) name
u16 wname[256]; u16 wname[256];
u32 name_len = strnlen(name, 256); int name_len = utf8_to_utf16(wname, (u8*) name, 255, 255);
for (name_len = 0; name[name_len]; name_len++) if (name_len <= 0) return NULL;
wname[name_len] = name[name_len]; // poor mans UTF-8 -> UTF-16 wname[name_len] = 0;
// hashing, first offset // hashing, first offset
u32 hash = HashLv3Path(wname, name_len, offset_parent); u32 hash = HashLv3Path(wname, name_len, offset_parent);
@ -54,7 +55,7 @@ RomFsLv3DirMeta* GetLv3DirMeta(const char* name, u32 offset_parent, RomFsLv3Inde
for (; offset < index->size_dirmeta; offset = meta->offset_samehash) { for (; offset < index->size_dirmeta; offset = meta->offset_samehash) {
meta = (RomFsLv3DirMeta*) (index->dirmeta + offset); meta = (RomFsLv3DirMeta*) (index->dirmeta + offset);
if ((offset_parent == meta->offset_parent) && if ((offset_parent == meta->offset_parent) &&
(name_len == meta->name_len / 2) && ((u32) name_len == meta->name_len / 2) &&
(memcmp(wname, meta->wname, name_len * 2) == 0)) (memcmp(wname, meta->wname, name_len * 2) == 0))
break; break;
} }
@ -65,11 +66,11 @@ RomFsLv3DirMeta* GetLv3DirMeta(const char* name, u32 offset_parent, RomFsLv3Inde
RomFsLv3FileMeta* GetLv3FileMeta(const char* name, u32 offset_parent, RomFsLv3Index* index) { RomFsLv3FileMeta* GetLv3FileMeta(const char* name, u32 offset_parent, RomFsLv3Index* index) {
RomFsLv3FileMeta* meta; RomFsLv3FileMeta* meta;
// wide name // wide (UTF-16) name
u16 wname[256]; u16 wname[256];
u32 name_len = strnlen(name, 256); int name_len = utf8_to_utf16(wname, (u8*) name, 255, 255);
for (name_len = 0; name[name_len]; name_len++) if (name_len <= 0) return NULL;
wname[name_len] = name[name_len]; // poor mans UTF-8 -> UTF-16 wname[name_len] = 0;
// hashing, first offset // hashing, first offset
u32 hash = HashLv3Path(wname, name_len, offset_parent); u32 hash = HashLv3Path(wname, name_len, offset_parent);
@ -79,7 +80,7 @@ RomFsLv3FileMeta* GetLv3FileMeta(const char* name, u32 offset_parent, RomFsLv3In
for (; offset < index->size_filemeta; offset = meta->offset_samehash) { for (; offset < index->size_filemeta; offset = meta->offset_samehash) {
meta = (RomFsLv3FileMeta*) (index->filemeta + offset); meta = (RomFsLv3FileMeta*) (index->filemeta + offset);
if ((offset_parent == meta->offset_parent) && if ((offset_parent == meta->offset_parent) &&
(name_len == meta->name_len / 2) && ((u32) name_len == meta->name_len / 2) &&
(memcmp(wname, meta->wname, name_len * 2) == 0)) (memcmp(wname, meta->wname, name_len * 2) == 0))
break; break;
} }

View File

@ -619,7 +619,7 @@ u32 FileHexViewer(const char* path) {
if (x_off >= 0) DrawStringF(screen, x_off - x0, y, cutoff ? COLOR_HVOFFS : COLOR_HVOFFSI, if (x_off >= 0) DrawStringF(screen, x_off - x0, y, cutoff ? COLOR_HVOFFS : COLOR_HVOFFSI,
COLOR_STD_BG, "%08X", (unsigned int) offset + curr_pos); COLOR_STD_BG, "%08X", (unsigned int) offset + curr_pos);
if (x_ascii >= 0) { if (x_ascii >= 0) {
DrawString(screen, ascii, x_ascii - x0, y, COLOR_HVASCII, COLOR_STD_BG); DrawString(screen, ascii, x_ascii - x0, y, COLOR_HVASCII, COLOR_STD_BG, false);
for (u32 i = marked0; i < marked1; i++) for (u32 i = marked0; i < marked1; i++)
DrawCharacter(screen, ascii[i % cols], x_ascii - x0 + (FONT_WIDTH_EXT * i), y, COLOR_MARKED, COLOR_STD_BG); DrawCharacter(screen, ascii[i % cols], x_ascii - x0 + (FONT_WIDTH_EXT * i), y, COLOR_MARKED, COLOR_STD_BG);
if (edit_mode && ((u32) cursor / cols == row)) DrawCharacter(screen, ascii[cursor % cols], if (edit_mode && ((u32) cursor / cols == row)) DrawCharacter(screen, ascii[cursor % cols],

View File

@ -1394,7 +1394,7 @@ void MemTextView(const char* text, u32 len, char* line0, int off_disp, int lno,
if (ar) memcpy(txtstr + p_ar, ar_str, strnlen(ar_str, 16)); if (ar) memcpy(txtstr + p_ar, ar_str, strnlen(ar_str, 16));
// draw line number & text // draw line number & text
DrawString(TOP_SCREEN, txtstr, x_txt, y, color_text, COLOR_STD_BG); DrawString(TOP_SCREEN, txtstr, x_txt, y, color_text, COLOR_STD_BG, false);
if (TV_LNOS > 0) { // line number if (TV_LNOS > 0) { // line number
if (ptr != ptr_next) if (ptr != ptr_next)
DrawStringF(TOP_SCREEN, x_lno, y, ((ptr == text) || (*(ptr-1) == '\n')) ? COLOR_TVOFFS : COLOR_TVOFFSL, COLOR_STD_BG, "%0*lu", TV_LNOS, nln); DrawStringF(TOP_SCREEN, x_lno, y, ((ptr == text) || (*(ptr-1) == '\n')) ? COLOR_TVOFFS : COLOR_TVOFFSL, COLOR_STD_BG, "%0*lu", TV_LNOS, nln);
@ -1404,7 +1404,7 @@ void MemTextView(const char* text, u32 len, char* line0, int off_disp, int lno,
// colorize comment if is_script // colorize comment if is_script
if ((cmt_start > 0) && ((u32) cmt_start < TV_LLEN_DISP)) { if ((cmt_start > 0) && ((u32) cmt_start < TV_LLEN_DISP)) {
memset(txtstr, ' ', cmt_start); memset(txtstr, ' ', cmt_start);
DrawString(TOP_SCREEN, txtstr, x_txt, y, script_color_comment, COLOR_TRANSPARENT); DrawString(TOP_SCREEN, txtstr, x_txt, y, script_color_comment, COLOR_TRANSPARENT, false);
} }
// colorize arrows // colorize arrows

View File

@ -1,6 +1,7 @@
#include "vgame.h" #include "vgame.h"
#include "image.h" #include "image.h"
#include "game.h" #include "game.h"
#include "utf.h"
#include "aes.h" #include "aes.h"
#define VGAME_BUFFER_SIZE 0x200000 // at least 2MB, multiple of 0x200 #define VGAME_BUFFER_SIZE 0x200000 // at least 2MB, multiple of 0x200
@ -1139,8 +1140,7 @@ bool GetVGameLv3Filename(char* name, const VirtualFile* vfile, u32 n_chars) {
name_len = filemeta->name_len / 2; name_len = filemeta->name_len / 2;
} }
memset(name, 0, n_chars); memset(name, 0, n_chars);
for (u32 i = 0; (i < (n_chars-1)) && (i < name_len); i++) utf16_to_utf8((u8*) name, wname, n_chars-1, name_len);
name[i] = wname[i]; // poor mans UTF-16 -> UTF-8 (doesn't work proper for special chars)
return true; return true;
} }