[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] fix locale string reading
From: |
Ludovic Courtès |
Subject: |
Re: [PATCH] fix locale string reading |
Date: |
Wed, 16 Nov 2011 00:51:26 +0100 |
User-agent: |
Gnus/5.110018 (No Gnus v0.18) Emacs/24.0.90 (gnu/linux) |
Hi Mark!
Mark H Weaver <address@hidden> skribis:
> address@hidden (Ludovic Courtès) writes:
>>> I think we should consider decoding the command-line arguments using the
>>> locale specified by the environment variables, at least in cases like
>>> this where there's no way for the user to call setlocale before the
>>> conversion happens.
>>
>> Below is a patch that does roughly that (we should get ‘locale_encoding’
>> reviewed and perhaps added to Gnulib.)
>>
>> It solves the problem:
>>
>> # With the patch.
>> $ ./meta/guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))'
>> -- λ
>> (/home/ludo/src/guile/libguile/.libs/guile -- λ)
>>
>> # Previously.
>> $ guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))' -- λ
>> (guile -- ??)
>
> Looks great, thanks! :)
>
> I have one question though. You fixed scm_compile_shell_switches, but I
> see another place where command-line arguments are converted to Scheme
> strings before the user is able to call setlocale: guile.c and init.c.
>
> main (guile.c) calls scm_boot_guile (init.c), which uses
> invoke_main_func (init.c), which calls scm_set_program_arguments
> (feature.c). Does this code need to be fixed also?
Yes, good catch!
An updated patch is attached. It seems to fulfill its mission:
--8<---------------cut here---------------start------------->8---
# Now:
$ ./meta/guile -c '(setlocale LC_ALL "en_US.UTF8")(display (list (command-line)
(program-arguments)))' -- λ
((/home/ludo/src/guile/libguile/.libs/guile -- λ)
(/home/ludo/src/guile/libguile/.libs/guile -- λ))
# Before:
$ guile -c '(setlocale LC_ALL "en_US.UTF8")(display (list (command-line)
(program-arguments)))' -- λ
((guile -- ??) (guile -- ??))
--8<---------------cut here---------------end--------------->8---
Note that the code uses SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE, but I
wonder if we couldn’t do better. For instance, upon conversion failure,
we could pass the argument as a bytevector instead of a string and let
the application cope with it. OTOH, that would be an API change.
Thoughts?
Thanks,
Ludo’.
diff --git a/libguile/feature.c b/libguile/feature.c
index 7007403..f3bddc7 100644
--- a/libguile/feature.c
+++ b/libguile/feature.c
@@ -1,5 +1,6 @@
-/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2004, 2006, 2007,
2009 Free Software Foundation, Inc.
- *
+/* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ * 2006, 2007, 2009, 2011 Free Software Foundation, Inc.
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
@@ -36,7 +37,8 @@
-static SCM progargs_fluid;
+SCM scm_program_arguments_fluid;
+
static SCM features_var;
void
@@ -58,7 +60,7 @@ SCM_DEFINE (scm_program_arguments, "program-arguments", 0, 0,
0,
"options like @code{-e} and @code{-l}.")
#define FUNC_NAME s_scm_program_arguments
{
- return scm_fluid_ref (progargs_fluid);
+ return scm_fluid_ref (scm_program_arguments_fluid);
}
#undef FUNC_NAME
@@ -74,7 +76,7 @@ scm_set_program_arguments (int argc, char **argv, char *first)
SCM args = scm_makfromstrs (argc, argv);
if (first)
args = scm_cons (scm_from_locale_string (first), args);
- scm_fluid_set_x (progargs_fluid, args);
+ scm_fluid_set_x (scm_program_arguments_fluid, args);
}
SCM_DEFINE (scm_set_program_arguments_scm, "set-program-arguments", 1, 0, 0,
@@ -89,7 +91,7 @@ SCM_DEFINE (scm_set_program_arguments_scm,
"set-program-arguments", 1, 0, 0,
"strings within it are copied, so should not be modified later.")
#define FUNC_NAME s_scm_set_program_arguments_scm
{
- return scm_fluid_set_x (progargs_fluid, lst);
+ return scm_fluid_set_x (scm_program_arguments_fluid, lst);
}
#undef FUNC_NAME
@@ -99,7 +101,7 @@ SCM_DEFINE (scm_set_program_arguments_scm,
"set-program-arguments", 1, 0, 0,
void
scm_init_feature()
{
- progargs_fluid = scm_make_fluid ();
+ scm_program_arguments_fluid = scm_make_fluid ();
features_var = scm_c_define ("*features*", SCM_EOL);
#ifndef _Windows
diff --git a/libguile/feature.h b/libguile/feature.h
index d373bc7..467f9ed 100644
--- a/libguile/feature.h
+++ b/libguile/feature.h
@@ -3,7 +3,8 @@
#ifndef SCM_FEATURE_H
#define SCM_FEATURE_H
-/* Copyright (C) 1995,1996,1999,2000,2001, 2006, 2007, 2008 Free Software
Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1999, 2000, 2001, 2006, 2007, 2008,
+ * 2011 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@@ -29,6 +30,8 @@ SCM_API void scm_add_feature (const char* str);
SCM_API SCM scm_program_arguments (void);
SCM_API void scm_set_program_arguments (int argc, char **argv, char *first);
SCM_API SCM scm_set_program_arguments_scm (SCM lst);
+
+SCM_INTERNAL SCM scm_program_arguments_fluid;
SCM_INTERNAL void scm_init_feature (void);
#endif /* SCM_FEATURE_H */
diff --git a/libguile/init.c b/libguile/init.c
index 8e3888d..633f8c6 100644
--- a/libguile/init.c
+++ b/libguile/init.c
@@ -332,7 +332,7 @@ invoke_main_func (void *body_data)
{
struct main_func_closure *closure = (struct main_func_closure *) body_data;
- scm_set_program_arguments (closure->argc, closure->argv, 0);
+ scm_i_set_boot_program_arguments (closure->argc, closure->argv);
(*closure->main_func) (closure->closure, closure->argc, closure->argv);
scm_restore_signals ();
diff --git a/libguile/script.c b/libguile/script.c
index 5e0685a..b1d3327 100644
--- a/libguile/script.c
+++ b/libguile/script.c
@@ -26,6 +26,7 @@
#include <stdio.h>
#include <errno.h>
#include <ctype.h>
+#include <uniconv.h>
#include "libguile/_scm.h"
#include "libguile/eval.h"
@@ -368,6 +369,87 @@ scm_shell_usage (int fatal, char *message)
: SCM_BOOL_F));
}
+/* Return the name of the locale encoding suggested by environment
+ variables, even if it's not current, or NULL if no encoding is
+ defined. Based on Gnulib's `localcharset.c'. */
+static const char *
+locale_encoding (void)
+{
+ static char buf[2 + 10 + 1];
+ const char *locale, *codeset = NULL;
+
+ /* Allow user to override the codeset, as set in the operating system,
+ with standard language environment variables. */
+ locale = getenv ("LC_ALL");
+ if (locale == NULL || locale[0] == '\0')
+ {
+ locale = getenv ("LC_CTYPE");
+ if (locale == NULL || locale[0] == '\0')
+ locale = getenv ("LANG");
+ }
+ if (locale != NULL && locale[0] != '\0')
+ {
+ /* If the locale name contains an encoding after the dot, return it. */
+ const char *dot = strchr (locale, '.');
+
+ if (dot != NULL)
+ {
+ const char *modifier;
+
+ dot++;
+ /* Look for the possible @... trailer and remove it, if any. */
+ modifier = strchr (dot, '@');
+ if (modifier == NULL)
+ return dot;
+ if (modifier - dot < sizeof (buf))
+ {
+ memcpy (buf, dot, modifier - dot);
+ buf [modifier - dot] = '\0';
+ return buf;
+ }
+ }
+ else if (strcmp (locale, "C") == 0)
+ {
+ strcpy (buf, "ASCII");
+ return buf;
+ }
+
+ /* Resolve through the charset.alias file. */
+ codeset = locale;
+ }
+
+ return codeset;
+}
+
+/* Return a list of strings from ARGV, which contains ARGC strings
+ assumed to be encoded in the current locale. Use `locale_charset'
+ instead of relying on `scm_from_locale_string' because the user
+ hasn't had a change to call (setlocale LC_ALL "") yet. */
+static SCM
+locale_arguments_to_string_list (int argc, char **const argv)
+{
+ int i;
+ SCM lst;
+ const char *encoding;
+
+ encoding = locale_encoding ();
+ for (i = argc - 1, lst = SCM_EOL;
+ i >= 0;
+ i--)
+ lst = scm_cons (scm_from_stringn (argv[i], (size_t) -1, encoding,
+ SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE),
+ lst);
+
+ return lst;
+}
+
+/* Set the value returned by `program-arguments', given ARGC and ARGV. */
+void
+scm_i_set_boot_program_arguments (int argc, char *argv[])
+{
+ scm_fluid_set_x (scm_program_arguments_fluid,
+ locale_arguments_to_string_list (argc, argv));
+}
/* Given an array of command-line switches, return a Scheme expression
to carry out the actions specified by the switches.
@@ -378,7 +460,7 @@ scm_compile_shell_switches (int argc, char **argv)
{
return scm_call_2 (scm_c_public_ref ("ice-9 command-line",
"compile-shell-switches"),
- scm_makfromstrs (argc, argv),
+ locale_arguments_to_string_list (argc, argv),
(scm_usage_name
? scm_from_locale_string (scm_usage_name)
: scm_from_latin1_string ("guile")));
diff --git a/libguile/script.h b/libguile/script.h
index 7e3828a..cf0162a 100644
--- a/libguile/script.h
+++ b/libguile/script.h
@@ -3,7 +3,7 @@
#ifndef SCM_SCRIPT_H
#define SCM_SCRIPT_H
-/* Copyright (C) 1997,1998,2000, 2006, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 1997,1998,2000, 2006, 2008, 2011 Free Software Foundation,
Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@@ -37,6 +37,7 @@ SCM_API void scm_shell_usage (int fatal, char *message);
SCM_API SCM scm_compile_shell_switches (int argc, char **argv);
SCM_API void scm_shell (int argc, char **argv);
SCM_API char *scm_usage_name;
+SCM_INTERNAL void scm_i_set_boot_program_arguments (int argc, char *argv[]);
SCM_INTERNAL void scm_init_script (void);
#endif /* SCM_SCRIPT_H */
- Re: [PATCH] fix locale string reading, (continued)
- Re: [PATCH] fix locale string reading, Nala Ginrut, 2011/11/08
- Re: [PATCH] fix locale string reading, Nala Ginrut, 2011/11/08
- Re: [PATCH] fix locale string reading, Peter Brett, 2011/11/09
- Re: [PATCH] fix locale string reading, Nala Ginrut, 2011/11/09
- Re: [PATCH] fix locale string reading, Peter Brett, 2011/11/09
- Re: [PATCH] fix locale string reading, Nala Ginrut, 2011/11/09
Re: [PATCH] fix locale string reading, Ludovic Courtès, 2011/11/11