From a6acdfff6a48dccf7df4c3d3fab94ea548fa1ff2 Mon Sep 17 00:00:00 2001 Message-Id: From: Philipp Hahn Date: Sun, 8 Mar 2015 10:55:38 +0100 Subject: [PATCH] VHD: Fix locale aware character encoding handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Organization: Univention GmbH, Bremen, Germany To: address@hidden ASCII is 7 bit only, which does not work in UTF-8 environments: > failed to read parent name Setup locale in vhd-util to parse LC_CTYPE and use the right codeset when doing file name encoding and decoding. Increase allocation for UTF-8 buffer as one UTF-16 character might use twice as much space in UTF-8 (or more). Don't check outbytesleft==0 as one UTF-8 characters get encoded into 1..8 bytes, so it's perfectly fine (and expected) for the output to have remaining bytes left. Test-case: $ ./vhd-util create -n ä.vhd -s 1 $ ./vhd-util snapshot -n snap.vhd -p ä.vhd ; echo $? See for more information about the details of handling the encoding right. Signed-off-by: Philipp Hahn --- tools/blktap2/vhd/lib/libvhd.c | 27 +++++++++++++++++++-------- tools/blktap2/vhd/vhd-util.c | 3 +++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tools/blktap2/vhd/lib/libvhd.c b/tools/blktap2/vhd/lib/libvhd.c index 95eb5d6..1fd5b4e 100644 --- a/tools/blktap2/vhd/lib/libvhd.c +++ b/tools/blktap2/vhd/lib/libvhd.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "libvhd.h" #include "relative-path.h" @@ -1296,6 +1297,7 @@ vhd_macx_encode_location(char *name, char **out, int *outlen) size_t ibl, obl; char *uri, *uri_utf8, *uri_utf8p, *ret; const char *urip; + char *codeset; err = 0; ret = NULL; @@ -1304,7 +1306,7 @@ vhd_macx_encode_location(char *name, char **out, int *outlen) len = strlen(name) + strlen("file://"); ibl = len; - obl = len; + obl = len * 2; urip = uri = malloc(ibl + 1); uri_utf8 = uri_utf8p = malloc(obl); @@ -1312,7 +1314,8 @@ vhd_macx_encode_location(char *name, char **out, int *outlen) if (!uri || !uri_utf8) return -ENOMEM; - cd = iconv_open("UTF-8", "ASCII"); + codeset = nl_langinfo(CODESET); + cd = iconv_open("UTF-8", codeset); if (cd == (iconv_t)-1) { err = -errno; goto out; @@ -1325,7 +1328,7 @@ vhd_macx_encode_location(char *name, char **out, int *outlen) (char **) #endif &urip, &ibl, &uri_utf8p, &obl) == (size_t)-1 || - ibl || obl) { + ibl) { err = (errno ? -errno : -EIO); goto out; } @@ -1357,6 +1360,7 @@ vhd_w2u_encode_location(char *name, char **out, int *outlen) size_t ibl, obl; char *uri, *uri_utf16, *uri_utf16p, *tmp, *ret; const char *urip; + char *codeset; err = 0; ret = NULL; @@ -1404,7 +1408,8 @@ vhd_w2u_encode_location(char *name, char **out, int *outlen) * MICROSOFT_COMPAT * little endian unicode here */ - cd = iconv_open("UTF-16LE", "ASCII"); + codeset = nl_langinfo(CODESET); + cd = iconv_open("UTF-16LE", codeset); if (cd == (iconv_t)-1) { err = -errno; goto out; @@ -1415,7 +1420,7 @@ vhd_w2u_encode_location(char *name, char **out, int *outlen) (char **) #endif &urip, &ibl, &uri_utf16p, &obl) == (size_t)-1 || - ibl || obl) { + ibl) { err = (errno ? -errno : -EIO); goto out; } @@ -1447,11 +1452,13 @@ vhd_macx_decode_location(const char *in, char *out, int len) iconv_t cd; char *name; size_t ibl, obl; + char *codeset; name = out; ibl = obl = len; - cd = iconv_open("ASCII", "UTF-8"); + codeset = nl_langinfo(CODESET); + cd = iconv_open(codeset, "UTF-8"); if (cd == (iconv_t)-1) return NULL; @@ -1479,11 +1486,13 @@ vhd_w2u_decode_location(const char *in, char *out, int len, char *utf_type) iconv_t cd; char *name, *tmp; size_t ibl, obl; + char *codeset; tmp = name = out; ibl = obl = len; - cd = iconv_open("ASCII", utf_type); + codeset = nl_langinfo(CODESET); + cd = iconv_open(codeset, utf_type); if (cd == (iconv_t)-1) return NULL; @@ -2450,6 +2459,7 @@ vhd_initialize_header_parent_name(vhd_context_t *ctx, const char *parent_path) size_t ibl, obl; char *ppath, *dst; const char *pname; + char *codeset; err = 0; pname = NULL; @@ -2459,7 +2469,8 @@ vhd_initialize_header_parent_name(vhd_context_t *ctx, const char *parent_path) * MICROSOFT_COMPAT * big endian unicode here */ - cd = iconv_open(UTF_16BE, "ASCII"); + codeset = nl_langinfo(CODESET); + cd = iconv_open(UTF_16BE, codeset); if (cd == (iconv_t)-1) { err = -errno; goto out; diff --git a/tools/blktap2/vhd/vhd-util.c b/tools/blktap2/vhd/vhd-util.c index 944a59e..13f1835 100644 --- a/tools/blktap2/vhd/vhd-util.c +++ b/tools/blktap2/vhd/vhd-util.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include "libvhd.h" #include "vhd-util.h" @@ -114,6 +116,7 @@ main(int argc, char *argv[]) if (setrlimit(RLIMIT_CORE, &rlim) < 0) fprintf(stderr, "setrlimit failed: %d\n", errno); #endif + setlocale(LC_CTYPE, ""); ret = 0; -- 1.9.1