# # # patch "Makefile.am" # from [f6878c0fbaba989eaf1f2e9608144338747905fe] # to [2bbc5c3f98f24c81c68d2947f27e42cf012520a6] # # patch "tester-plaf.hh" # from [1fb912adce77493fcd98c6eb94eb3a5a2deab2b6] # to [b6b7ad97e517261fdb37d05064e5f1365197f529] # # patch "tester.cc" # from [e6693333f1bbeeee26599a2a608fa5df3ea01150] # to [6c60d23669f0bb1987e9162caf82c8f644530195] # # patch "testlib.lua" # from [a688fbcf17fd30160c55237e1dfdb98b540ecd02] # to [aafdb7ef96398c885f9e89313999027a91dffe66] # # patch "unix/tester-plaf.cc" # from [9002cee809acea4b97baddb839acb021d2fa29c6] # to [2ca5dcb66ac86f2e1632055c5973089d1b9d78ae] # # patch "win32/tester-plaf.cc" # from [4baec6a135f718d97454be05fba7a7bc29cdd4b8] # to [4d713dfc459b94a813eccddd6a3295bb0dd007ed] # ============================================================ --- Makefile.am f6878c0fbaba989eaf1f2e9608144338747905fe +++ Makefile.am 2bbc5c3f98f24c81c68d2947f27e42cf012520a6 @@ -338,7 +338,7 @@ tester_LDADD = $(addprefix mtn-, $(patsu tester_SOURCES = tester.cc nodist_tester_SOURCES = testlib.c tester_LDADD = $(addprefix mtn-, $(patsubst %.cc, %.$(OBJEXT), \ - $(filter %.cc, $(SANITY_CORE_SOURCES) $(LUAEXT_SOURCES) option.cc))) + $(filter %.cc, $(SANITY_CORE_SOURCES) $(LUAEXT_SOURCES)))) txt2c_SOURCES = txt2c.cc @@ -597,8 +597,10 @@ run_%_tests: Makefile echo 'exit 0'; } > $@ chmod 755 $@ +# The leading + causes Make to treat this as a recursive invocation, +# allowing it to participate in the jobserver protocol. %_tests.status: run_%_tests %-testsuite.lua tester FORCE - ./run_$*_tests + +./run_$*_tests unit_tests.status : unit_tester lua_tests.status : mtn ============================================================ --- tester-plaf.hh 1fb912adce77493fcd98c6eb94eb3a5a2deab2b6 +++ tester-plaf.hh b6b7ad97e517261fdb37d05064e5f1365197f529 @@ -37,9 +37,9 @@ bool running_as_root(); // process, *or* the program named in 'runner' is spawned with argument // vector [runner, '-r', testfile, firstdir, test-name]. Either way, the // child process is running in a just-created, empty directory which is -// exclusive to that test; its standard input is the null device; and its -// standard output and error are a logfile. If invoke() is called, it is -// expected not to return. +// exclusive to that test. Standard I/O is not touched; the child is +// expected not to use stdin, stdout, or stderr. If invoke() is called, +// its return value is the process exit code. // // After each per-test child process completes, cleanup() is called for that // test. If cleanup() returns true, the per-test directory is deleted. @@ -67,7 +67,7 @@ struct test_invoker { lua_State * st; test_invoker(lua_State *st) : st(st) {} - void operator()(std::string const & testname) const; + int operator()(std::string const & testname) const; }; struct test_cleaner @@ -85,6 +85,7 @@ void run_tests_in_children(test_enumerat std::string const & runner, std::string const & testfile, std::string const & firstdir); +void prepare_for_parallel_testcases(int, int, int); // These functions are actually in tester.cc but are used by tester-plaf.cc. void do_remove_recursive(std::string const & dir); ============================================================ --- tester.cc e6693333f1bbeeee26599a2a608fa5df3ea01150 +++ tester.cc 6c60d23669f0bb1987e9162caf82c8f644530195 @@ -1,13 +1,15 @@ #include "base.hh" #include "lua.hh" #include "platform.hh" #include "tester-plaf.hh" #include "sanity.hh" -#include "option.hh" +#include using std::string; using std::map; using std::vector; +using boost::lexical_cast; +using boost::bad_lexical_cast; // defined in testlib.c, generated from testlib.lua extern char const testlib_constant[]; @@ -58,7 +60,7 @@ static int panic_thrower(lua_State * st) static int panic_thrower(lua_State * st) { - throw oops("lua error"); + throw oops((FL("lua error: %s\n") % luaL_checkstring(st, -1)).str().c_str()); } // N.B. some of this code is copied from file_io.cc @@ -172,7 +174,7 @@ void do_copy_recursive(string const & fr void do_copy_recursive(string const & from, string to) { path::status fromstat = get_path_status(from); - + E(fromstat != path::nonexistent, F("Source '%s' for copy does not exist") % from); @@ -521,7 +523,7 @@ LUAEXT(require_not_root, ) "Please try again with a normal user account.\n")); exit(1); } - return 0; + return 0; } // run_tests_in_children (to_run, reporter) @@ -565,8 +567,10 @@ bool test_enumerator::operator()(test_to // Invoke one test case in the child. This may be called by // run_tests_in_children, or by main, because Windows doesn't have fork. +// It is not allowed to write to standard output or standard error under +// any circumstances whatsoever. Not calling lua_close is deliberate. -void test_invoker::operator()(std::string const & testname) const +int test_invoker::operator()(std::string const & testname) const { int retcode; try @@ -581,31 +585,11 @@ void test_invoker::operator()(std::strin retcode = luaL_checkinteger(st, -1); lua_remove(st, -1); } - catch (informative_failure & e) - { - P(F("%s\n") % e.what()); - retcode = 1; - } - catch (std::logic_error & e) - { - P(F("Invariant failure: %s\n") % e.what()); - retcode = 3; - } - catch (std::exception & e) - { - P(F("Uncaught exception: %s") % e.what()); - retcode = 3; - } catch (...) { - P(F("Uncaught exception of unknown type")); - retcode = 3; + retcode = 124; } - - // This does not properly clean up global state, but none of it is - // process-external, so it's ok to let the OS obliterate it; and - // leaving this function any other way is not safe. - exit(retcode); + return retcode; } @@ -652,60 +636,367 @@ LUAEXT(run_tests_in_children, ) return 0; } +// Write all arguments to standard output. This is not a normal LUAEXT +// because it is only made available to run_tests as an argument, not +// established as globally visible. (Only a very limited number of places +// at the Lua level are allowed to talk to standard output.) +int run_tests_progress(lua_State *st) +{ + int n = lua_gettop(st); + for (int i = 1; i <= n; i++) + fputs(luaL_checkstring(st, i), stdout); + return 0; +} -int main(int argc, char **argv) +// RAII wrapper around a Lua state structure; also takes care of doing the +// initialization as we want it. Of note is that we do not want any +// Lua-level code getting its grubby fingers on stdin/out/err, so we have to +// take just about everything out of the io table, and we do not trust +// testlib.lua to do this for us. + +namespace { + struct lua_lib + { + lua_lib(string const & initial_dir, string const & suite); + ~lua_lib() { lua_close(st); } + lua_State * operator()() { return st; } + private: + lua_State * st; + }; + lua_lib::lua_lib(string const & initial_dir, string const & suite) + : st(luaL_newstate()) + { + static char const * const allowed_io_funcs[] = { + "open", "lines", "type", "tmpfile" + }; + + lua_atpanic (st, &panic_thrower); + luaL_openlibs(st); + add_functions(st); + + lua_getglobal(st, "io"); + lua_newtable(st); + + for (unsigned int i = 0; + i < sizeof allowed_io_funcs / sizeof allowed_io_funcs[0]; i++) + { + // this looks like it's a no-op, but the trick is that stack element + // -2 is the original "io" table in the getfield operation, but the + // new table we are constructing in the setfield operation (because + // getfield leaves its value at top of stack, and setfield pops it). + lua_getfield(st, -2, allowed_io_funcs[i]); + lua_setfield(st, -2, allowed_io_funcs[i]); + } + + lua_remove(st, -2); // oldtable newtable -- newtable + + // establish our new table as the value of + // package.loaded["io"]. + lua_getglobal(st, "package"); // -- newtable package + lua_getfield(st, -1, "loaded"); // -- newtable package loaded + lua_remove(st, -2); // -- newtable loaded + lua_pushvalue(st, -2); // -- newtable loaded newtable + lua_setfield(st, -2, "io"); // -- newtable loaded + lua_remove(st, -1); // -- newtable + + // also establish it as the value of the global "io" variable. + lua_setglobal(st, "io"); // -- + + // we can now load testlib.lua. + run_string(st, testlib_constant, "testlib.lua"); + + // the suite definition may know the initial working directory. + lua_pushstring(st, initial_dir.c_str()); + lua_setglobal(st, "initial_dir"); + + run_file(st, suite.c_str()); + } +} + +// This function is cloned from simplestring_xform.cc, which we cannot use +// here. It does not cover several possibilities handled by the real +// version but of no interest here. + +static vector split_into_words(string const & in) { - int retcode = 2; - lua_State *st = 0; + vector out; - try + string::size_type begin = 0; + string::size_type end = in.find_first_of(" ", begin); + + while (end != string::npos && end >= begin) { - vector tests_to_run; - bool want_help = false; - bool need_help = false; - bool debugging = false; - bool list_only = false; - bool run_one = false; + out.push_back(in.substr(begin, end-begin)); + begin = end + 1; + if (begin >= in.size()) + break; + end = in.find_first_of(" ", begin); + } + if (begin < in.size()) + out.push_back(in.substr(begin, in.size() - begin)); - option::concrete_option_set os; - os("help,h", "display help message", option::setter(want_help)) - ("debug,d", "don't erase per-test directories for successful tests", - option::setter(debugging)) - ("list,l", "list tests that would be run, but don't run them", - option::setter(list_only)) - ("run-one,r", "", // internal use only! - option::setter(run_one)) - ("--", "", option::setter(tests_to_run)); + return out; +} - try + +// Parse a boolean command line option: if ARG is either SHORTOPT or +// LONGOPT, return true, else false. +static bool +bool_option(char const * arg, char const * shortopt, char const * longopt) +{ + return ((shortopt && !strcmp(arg, shortopt)) + || (longopt && !strcmp(arg, longopt))); +} + +// Parse an integer-valued command line option: if ARG is either SHORTOPT +// or LONGOPT and a decimal integer follows, write that integer to VAL and +// return true, else leave VAL untouched and return false. +static bool +int_option(char const * arg, char const * shortopt, char const * longopt, + int & val) +{ + if (shortopt && !strncmp(arg, shortopt, strlen(shortopt))) + { + char *end; + int v = strtoul(arg + strlen(shortopt), &end, 10); + if (end != arg + strlen(shortopt) && *end == '\0') { - os.from_command_line(argc, argv); + val = v; + return true; } - catch (option::option_error & e) + } + + if (longopt && !strncmp(arg, longopt, strlen(longopt))) + { + char *end; + int v = strtoul(arg + strlen(longopt), &end, 10); + if (end != arg + strlen(longopt) && *end == '\0') { - P(F("%s: %s\n") % argv[0] % e.what()); - need_help = true; + val = v; + return true; } + } - if (tests_to_run.size() == 0) + return false; +} + +// Parse a two-integer-valued command line option: if ARG begins with OPT +// and continues with a pair of decimal integers separated by a comma, write +// the integers to VAL1 and VAL2 and return true; else leave VAL1 and VAL2 +// untouched and return false. +static bool +int_int_option(char const * arg, char const * opt, int & val1, int & val2) +{ + if (strncmp(arg, opt, strlen(opt))) + return false; + + char *end1, *end2, *p; + int v1, v2; + + p = const_cast(arg + strlen(opt)); + + v1 = strtoul(p, &end1, 10); + + if (end1 == p || *end1 != ',') + return false; + + v2 = strtoul(end1 + 1, &end2, 10); + + if (end1 == end2 || *end2 != '\0') + return false; + + val1 = v1; + val2 = v2; + return true; +} + +// Extract parallelization-related options from MFLAGS. We can rely on +// Make to pass these arguments in a particular form: +// -j [N] no more than N parallel jobs (absent = no limit) +// -l [N] no more jobs if the system load average rises above N +// (absent = no limit) (not supported except with no N) +// --jobserver-fds=M,N talk to a job server on fds M and N to limit +// concurrency +// Anything else in MFLAGS is ignored. +// The first word in MFLAGS should have a dash prepended to it unless it +// already has one. + +static void +parse_makeflags(char const * mflags, + int & jobs, + int & jread, + int & jwrite) +{ + if (mflags == 0) + return; + + while (*mflags == ' ') mflags++; + + vector mf(split_into_words(mflags)); + + if (mf.size() == 0 || (mf.size() == 1 && mf[0] == "")) + return; + + if (mf[0][0] != '-') + mf[0] = string("-") + mf[0]; + + int jxx = 0; + for (vector::const_iterator i = mf.begin(); i != mf.end(); i++) + { + if (*i == "-j") { - P(F("%s: no test suite specified\n")); - need_help = true; + jxx = -1; + i++; + if (i == mf.end()) + break; + try + { + jxx = lexical_cast(*i); + if (jxx <= 0) + { + W(F("-j %d makes no sense, option ignored") % jxx); + jxx = 0; + } + } + catch (bad_lexical_cast &) + { + i--; + } } + else if (*i == "-l") + { + i++; + if (i == mf.end()) + break; + try + { + double dummy = lexical_cast(*i); + W(F("no support for -l %f: forcing -j1") % dummy); + jxx = 1; + } + catch (bad_lexical_cast &) + { + i--; + } + } + else if (int_int_option(i->c_str(), "--jobserver-fds=", jread, jwrite)) + ; + } - if (run_one && (want_help || debugging || list_only - || tests_to_run.size() != 3)) + // do not permit -j in MAKEFLAGS to override -j on the command line. + if (jxx != 0 && jobs == 0) + jobs = jxx; +} + +static void +parse_command_line(int argc, char const * const * argv, + bool & want_help, bool & need_help, + bool & debugging, bool & list_only, + bool & run_one, int & jobs, + vector & tests_to_run) +{ + int i; + int jxx = 0; + + for (i = 1; i < argc; i++) + { + if (string(argv[i]) == "--") + break; + + if (bool_option(argv[i], "-h", "--help")) + want_help = true; + else if (bool_option(argv[i], "-d", "--debug")) + debugging = true; + else if (bool_option(argv[i], "-l", "--list-only")) + list_only = true; + else if (bool_option(argv[i], "-r", 0)) + run_one = true; + else if (bool_option(argv[i], "-j", "--jobs")) { - P(F("%s: incorrect self-invocation\n") % argv[0]); + // if there turns out not to be a number, this is -j infinity. + jxx = -1; + + if (i+1 < argc) + try + { + jxx = lexical_cast(argv[i]); + if (jxx <= 0) + { + W(F("-j %d makes no sense, option ignored") % jxx); + jxx = 0; + } + i++; + } + catch (bad_lexical_cast &) + { + // it wasn't a number. + } + } + else if (int_option(argv[i], "-j", "--jobs=", jobs)) + /* no action required */; + else if (argv[i][1] == '-') + { + P(F("unrecognized option '%s'") % argv[i]); need_help = true; } + else + tests_to_run.push_back(argv[i]); + } + // all argv elements from i+1 to argc go into tests_to_run without further + // interpretation. + if (i < argc) + for (i++; i < argc; i++) + tests_to_run.push_back(argv[i]); + + if (jxx != 0) + jobs = jxx; + + E(!run_one || (!want_help && !debugging && !list_only + && tests_to_run.size() == 3 && jobs == 0), + F("incorrect self-invocation")); + + if (tests_to_run.size() == 0) + { + P(F("%s: no test suite specified\n")); + need_help = true; + } +} + +int main(int argc, char **argv) +{ + int retcode = 2; + + vector tests_to_run; + bool want_help = false; + bool need_help = false; + bool debugging = false; + bool list_only = false; + bool run_one = false; + int jobs = 0; + int jread = -1; + int jwrite = -1; + + try + { + parse_command_line(argc, argv, + want_help, need_help, debugging, list_only, + run_one, jobs, tests_to_run); + + parse_makeflags(getenv("MAKEFLAGS"), jobs, jread, jwrite); + if (want_help || need_help) { P(F("Usage: %s test-file testsuite [options] [tests]\n") % argv[0]); - P(F("Testsuite: a Lua script defining the test suite to run.\n")); - P(F("Options:\n%s\n") % os.get_usage_str()); - P(F("Tests may be specified as:\n" + P(F("Testsuite: a Lua script defining the test suite to run.\n" + "Options:\n" + " -l, --list just list tests that would be run" + " -d, --debug don't erase working dirs of successful tests" + " -j N, --jobs=N run N test cases in parallel" + " (note: unlike make, the N is not optional)" + " -h, --help display this help message\n" + // -r is deliberately not mentioned. + "Tests may be specified as:\n" " nothing - run all tests.\n" " numbers - run the tests with those numbers\n" " negative numbers count back from the end\n" @@ -715,10 +1006,8 @@ int main(int argc, char **argv) return want_help ? 0 : 2; } - st = luaL_newstate(); - lua_atpanic (st, &panic_thrower); - luaL_openlibs(st); - add_functions(st); + if (jobs == 0) // no setting from command line or MAKEFLAGS + jobs = 1; if (run_one) { @@ -731,23 +1020,11 @@ int main(int argc, char **argv) // We have been invoked inside the directory where we should run // the test. Stdout and stderr have been redirected to a per-test // logfile. - - lua_pushstring(st, tests_to_run[1].c_str()); - lua_setglobal(st, "initial_dir"); - source_dir = dirname(tests_to_run[0]); - - run_string(st, testlib_constant, "testlib.lua"); - run_file(st, tests_to_run[0].c_str()); - - Lua ll(st); - ll.func("run_one_test"); - ll.push_str(tests_to_run[2]); - ll.call(1,1) - .extract_int(retcode); + lua_lib st(tests_to_run[1], tests_to_run[0]); + return test_invoker(st)(tests_to_run[2]); #else - P(F("%s: self-invocation should not be used on Unix\n") % argv[0]); - retcode = 2; + E(false, F("self-invocation should not be used on Unix\n")); #endif } else @@ -781,15 +1058,12 @@ int main(int argc, char **argv) change_current_working_dir(run_dir); - run_string(st, testlib_constant, "testlib.lua"); - lua_pushstring(st, firstdir.c_str()); - lua_setglobal(st, "initial_dir"); - run_file(st, testfile.c_str()); + lua_lib st(firstdir, testfile); // arrange for isolation between different test suites running in // the same build directory. - lua_getglobal(st, "testdir"); - const char *testdir = lua_tostring(st, 1); + lua_getglobal(st(), "testdir"); + const char *testdir = lua_tostring(st(), 1); I(testdir); string testdir_base = basename(testdir); run_dir = run_dir + "/" + testdir_base; @@ -804,20 +1078,29 @@ int main(int argc, char **argv) do_mkdir(run_dir); } - Lua ll(st); + prepare_for_parallel_testcases(jobs, jread, jwrite); + + Lua ll(st()); ll.func("run_tests"); ll.push_bool(debugging); ll.push_bool(list_only); ll.push_str(run_dir); ll.push_str(logfile); ll.push_table(); - for (int i = 2; i < argc; ++i) + // i = 1 skips the first element of tests_to_run, which is the + // testsuite definition. + for (unsigned int i = 1; i < tests_to_run.size(); i++) { - ll.push_int(i-1); - ll.push_str(argv[i]); + ll.push_int(i); + ll.push_str(tests_to_run.at(i).c_str()); ll.set_table(); } - ll.call(5,1) + + // the Lua object doesn't wrap this + if (ll.ok()) + lua_pushcfunction(st(), run_tests_progress); + + ll.call(6,1) .extract_int(retcode); } } @@ -842,142 +1125,9 @@ int main(int argc, char **argv) retcode = 3; } - if (st) - lua_close(st); return retcode; } -// The functions below are used by option.cc, and cloned from ui.cc and -// simplestring_xform.cc, which we cannot use here. They do not cover -// several possibilities handled by the real versions. - -unsigned int -guess_terminal_width() -{ - unsigned int w = terminal_width(); - if (!w) - w = 80; // can't use constants:: here - return w; -} - -static void -split_into_lines(string const & in, vector & out) -{ - out.clear(); - string::size_type begin = 0; - string::size_type end = in.find_first_of("\r\n", begin); - while (end != string::npos && end >= begin) - { - out.push_back(in.substr(begin, end-begin)); - if (in.at(end) == '\r' - && in.size() > end+1 - && in.at(end+1) == '\n') - begin = end + 2; - else - begin = end + 1; - if (begin >= in.size()) - break; - end = in.find_first_of("\r\n", begin); - } - if (begin < in.size()) - out.push_back(in.substr(begin, in.size() - begin)); -} - -static vector split_into_words(string const & in) -{ - vector out; - - string::size_type begin = 0; - string::size_type end = in.find_first_of(" ", begin); - - while (end != string::npos && end >= begin) - { - out.push_back(in.substr(begin, end-begin)); - begin = end + 1; - if (begin >= in.size()) - break; - end = in.find_first_of(" ", begin); - } - if (begin < in.size()) - out.push_back(in.substr(begin, in.size() - begin)); - - return out; -} - -// See description for format_text below for more details. -static string -format_paragraph(string const & text, size_t const col, size_t curcol) -{ - I(text.find('\n') == string::npos); - - string formatted; - if (curcol < col) - { - formatted = string(col - curcol, ' '); - curcol = col; - } - - const size_t maxcol = guess_terminal_width(); - - vector< string > words = split_into_words(text); - for (vector< string >::const_iterator iter = words.begin(); - iter != words.end(); iter++) - { - string const & word = *iter; - - if (iter != words.begin() && - curcol + word.size() + 1 > maxcol) - { - formatted += '\n' + string(col, ' '); - curcol = col; - } - else if (iter != words.begin()) - { - formatted += ' '; - curcol++; - } - - formatted += word; - curcol += word.size(); - } - - return formatted; -} - -// Reformats the given text so that it fits in the current screen with no -// wrapping. -// -// The input text is a series of words and sentences. Paragraphs may be -// separated with a '\n' character, which is taken into account to do the -// proper formatting. The text should not finish in '\n'. -// -// 'col' specifies the column where the text will start and 'curcol' -// specifies the current position of the cursor. -string -format_text(string const & text, size_t const col, size_t curcol) -{ - I(curcol <= col); - - string formatted; - - vector< string > lines; - split_into_lines(text, lines); - for (vector< string >::const_iterator iter = lines.begin(); - iter != lines.end(); iter++) - { - string const & line = *iter; - - formatted += format_paragraph(line, col, curcol); - if (iter + 1 != lines.end()) - formatted += "\n\n"; - curcol = 0; - } - - return formatted; -} - - - // Local Variables: // mode: C++ // fill-column: 76 ============================================================ --- testlib.lua a688fbcf17fd30160c55237e1dfdb98b540ecd02 +++ testlib.lua aafdb7ef96398c885f9e89313999027a91dffe66 @@ -405,23 +405,16 @@ function samefile(left, right) end function samefile(left, right) - local ldat = nil - local rdat = nil - if left == "-" then - ldat = io.input:read("*a") - rdat = readfile(right) - elseif right == "-" then - rdat = io.input:read("*a") - ldat = readfile(left) + if left == "-" or right == "-" then + err("tests may not rely on standard input") + end + if fsize(left) ~= fsize(right) then + return false else - if fsize(left) ~= fsize(right) then - return false - else - ldat = readfile(left) - rdat = readfile(right) - end - end - return ldat == rdat + local ldat = readfile(left) + local rdat = readfile(right) + return ldat == rdat + end end function samelines(f, t) @@ -808,13 +801,12 @@ end end end -function run_tests(debugging, list_only, run_dir, logname, args) +function run_tests(debugging, list_only, run_dir, logname, args, progress) local torun = {} local run_all = true local function P(...) - io.write(unpack(arg)) - io.flush() + progress(unpack(arg)) logfile:write(unpack(arg)) end @@ -934,10 +926,7 @@ function run_tests(debugging, list_only, [121] = "error creating test directory", [122] = "error spawning test process", [123] = "error entering test directory", - [124] = "error redirecting stdin", - [125] = "error redirecting stdout", - [126] = "error redirecting stderr", - [127] = "test did not exit as expected" + [124] = "unhandled exception in child process" } -- callback closure passed to run_tests_in_children @@ -959,7 +948,7 @@ function run_tests(debugging, list_only, else local wfile, err = io.open(tdir .. "/STATUS", "r") if wfile ~= nil then - what = wfile:read() + what = string.gsub(wfile:read("*a"), "\n$", "") wfile:close() else what = string.format("FAIL (status file: %s)", err) @@ -994,12 +983,7 @@ function run_tests(debugging, list_only, counts.total = counts.total + 1 P(string.format("%s%s\n", test_header, what)) - if debugging then - return false - else - unlogged_remove(tdir .. "/STATUS") - return can_delete - end + return (can_delete and not debugging) end run_tests_in_children(torun, report_one_test) @@ -1050,7 +1034,7 @@ function run_one_test(tname) test.errfile = "" test.errline = -1 test.bglist = {} - test.log = io.output() + test.log = io.open("tester.log", "w") L("Test ", test.name, "\n") ============================================================ --- unix/tester-plaf.cc 9002cee809acea4b97baddb839acb021d2fa29c6 +++ unix/tester-plaf.cc 2ca5dcb66ac86f2e1632055c5973089d1b9d78ae @@ -6,11 +6,16 @@ #include "tester-plaf.hh" #include +#include #include #include #include +#include +#include using std::string; +using std::map; +using std::make_pair; void make_accessible(string const & name) { @@ -223,10 +228,196 @@ bool running_as_root() return !geteuid(); } -// General note: the magic numbers in this function are meaningful to -// testlib.lua. They indicate a number of failure scenarios in which -// more detailed diagnostics are not possible. +// Parallel test case support. +// +// GNU Make's job server algorithm is described in detail at +// . This program +// implements only part of the general algorithm: specifically, if +// this program is invoked as if it were a recursive make, it will +// participate in the job server algorithm when parallelizing its own +// subcomponents. None of those subcomponents are themselves +// recursive make operations. Therefore, what we do is: +// +// 1. The invoking make has created a pipe, and written N tokens into it. +// We are entitled to run one job at any time, plus as many of the N +// as we can get tokens for. +// +// * A token is just a one-byte character. (Empirically, GNU make +// uses plus signs (ASCII 0x2B) for this.) +// * All tokens are identical. +// +// 2. We know that this is the case because we observe, in the MAKEFLAGS +// environment variable, a construct similar to: +// +// --jobserver-fds=R,W -j +// +// where R and W are integers specifying the read and write ends of +// the jobserver communication pipe. If we do not observe any such +// construct, we run in serial mode (this is actually implemented +// by creating a pipe ourselves, not writing anything to it, and +// proceeding as described below). +// +// 2a. If the file descriptors specified in the above construct are not +// open, this means the invoking Makefile did not properly mark the +// command running this program as a recursive make. We print a +// diagnostic and run in serial mode. +// +// 3. We have a queue of jobs to be run, and a set of currently +// running jobs (initially none). Before beginning the main loop, +// we install a handler for SIGCHLD. The only thing this handler +// does is close the duplicate jobserver read end (see below). +// +// The main loop proceeds as follows: +// +// a. Remove the next job to be run from the queue. +// +// b. Create a duplicate of the read side of the jobserver pipe, if +// we don't already have one. +// +// c. Call wait() in nonblocking mode until it doesn't report any +// more dead children. For each reported child, write a token +// back to the jobserver pipe, unless it is the last running +// child. +// +// d. If the set of currently running jobs is nonempty, read one +// byte in blocking mode from the duplicate fd. If this returns +// 1, proceed to step e. If it returns -1 and errno is either +// EINTR or EBADF, go back to step b. Abort on any other return +// and/or errno value. +// +// e. We have a token! Fork. In the child, close both sides of the +// jobserver pipe, and the duplicate, and then invoke the job. +// +// 4. Once the queue of jobs is exhausted, the main loop terminates. +// Subsequent code repeats step 3c until there are no more children, +// doing so in *blocking* mode. +// +// The jiggery-pokery with duplicate read fds in 3b-3d is necessary to +// close a race window. If we didn't do that and a SIGCHLD arrived +// betwen steps c and d, a token could get lost and we could end up +// hanging forever in the read(). See the above webpage for further +// discussion. +// Sadly, there is no getting around global variables for these. However, +// the information in question really is process-global (file descriptors, +// signal handlers) so it's not like we could be reentrant here anyway. + +static int jobsvr_read = -1; +static int jobsvr_write = -1; +static volatile int jobsvr_read_dup = -1; +static int tokens_held = 0; + +static void sigchld(int) +{ + if (jobsvr_read_dup != -1) + { + close(jobsvr_read_dup); + jobsvr_read_dup = -1; + } +} + +// Encapsulation of token acquisition and release. We get one token for free. +// Note: returns true if we need to go reap children again, not if we have +// successfully acquired a token. +static bool acquire_token() +{ + if (tokens_held == 0) + { + tokens_held++; + return false; + } + + char dummy; + int n = read(jobsvr_read_dup, &dummy, 1); + if (n == 1) + { + tokens_held++; + return false; + } + else + { + I(n == -1 && (errno == EINTR || errno == EBADF)); + return true; + } +} + +static void release_token() +{ + if (tokens_held > 1) + write(jobsvr_write, "+", 1); + I(tokens_held > 0); + tokens_held--; +} + +// Set up the above static variables appropriately, given the arguments +// to -j and/or --jobserver-fd on the command line and MAKEFLAGS. +// Diagnostics generated here are exactly the same as GNU Make's. +void prepare_for_parallel_testcases(int jobs, int jread, int jwrite) +{ + if ((jread != -1 || jwrite != -1) + && (fcntl(jread, F_GETFD) == -1 || fcntl(jwrite, F_GETFD) == -1)) + { + W(F("jobserver unavailable: using -j1. Add `+' to parent make rule.")); + close(jread); + close(jwrite); + jread = jwrite = -1; + } + + if (jread != -1 && jwrite != -1 && jobs >= 2) + { + W(F("-jN forced in submake: disabling jobserver mode.")); + close(jread); + close(jwrite); + jread = jwrite = -1; + } + + if (jread == -1 && jwrite == -1) + { + int jp[2]; + E(pipe(jp) == 0, + F("creating jobs pipe: %s") % os_strerror(errno)); + jread = jp[0]; + jwrite = jp[1]; + + if (jobs == -1) + jobs = 11; // infinity goes to 11, but no higher. + + // can ignore errors; the worst case is we don't parallelize as much + // as was requested. + for (int i = 0; i < jobs-1; i++) + write(jwrite, "+", 1); + } + + I(jread != -1 && jwrite != -1); + jobsvr_read = jread; + jobsvr_write = jwrite; +} + +// Child side of the fork. The magic numbers in this function and +// run_tests_in_children are meaningful to testlib.lua. They indicate a +// number of failure scenarios in which more detailed diagnostics are not +// possible. + +// gcc 4.1 doesn't like attributes on the function definition +static NORETURN(void child(test_invoker const &, + string const &, string const &)); + +// Note: to avoid horrible headaches, we do not touch fds 0-2 nor the stdio +// streams. Child operations are expected to be coded not to do *anything* +// with those streams. The use of _exit is intentional. +static void child(test_invoker const & invoke, string const & tdir, + string const & tname) +{ + close(jobsvr_read); + close(jobsvr_write); + close(jobsvr_read_dup); + + if (chdir(tdir.c_str()) != 0) + _exit(123); + + _exit(invoke(tname)); +} + void run_tests_in_children(test_enumerator const & next_test, test_invoker const & invoke, test_cleaner const & cleanup, @@ -237,8 +428,55 @@ void run_tests_in_children(test_enumerat { test_to_run t; string testdir; + map children; + + if (jobsvr_read_dup != -1) + { + close(jobsvr_read_dup); + jobsvr_read_dup = -1; + } + + struct sigaction sa, osa; + sigemptyset(&sa.sa_mask); + sa.sa_handler = sigchld; + sa.sa_flags = SA_NOCLDSTOP; // deliberate non-use of SA_RESTART + + E(sigaction(SIGCHLD, &sa, &osa) == 0, + F("setting SIGCHLD handler: %s") % os_strerror(errno)); + while (next_test(t)) { + do + { + if (jobsvr_read_dup == -1) + jobsvr_read_dup = dup(jobsvr_read); + + for (;;) + { + int status; + pid_t pid = waitpid(-1, &status, WNOHANG); + if (pid == 0) + break; + if (pid == -1) + { + if (errno == ECHILD) + break; + if (errno == EINTR) + continue; + E(false, F("waitpid failed: %s") % os_strerror(errno)); + } + + map::iterator tfin = children.find(pid); + I(tfin != children.end()); + if (cleanup(tfin->second, status)) + do_remove_recursive(run_dir + "/" + tfin->second.name); + children.erase(tfin); + release_token(); + } + } + while (acquire_token()); + + // This must be done before we try to redirect stdout/err to a file // within testdir. If we did it in the child, we would have to do it // before it was safe to issue diagnostics. @@ -251,50 +489,55 @@ void run_tests_in_children(test_enumerat catch (...) { cleanup(t, 121); + release_token(); continue; } // Make sure there is no pending buffered output before forking, or it // may be doubled. fflush(0); - pid_t child = fork(); - - if (child != 0) // parent + pid_t pid = fork(); + if (pid == 0) + child(invoke, testdir, t.name); + else if (pid == -1) { - int status; - if (child == -1) - status = 122; // spawn failure - else - process_wait(child, &status); - - if (cleanup(t, status)) + if (cleanup(t, 122)) do_remove_recursive(testdir); + release_token(); } - else // child + else + children.insert(make_pair(pid, t)); + } + + // Now wait for any unfinished children. + for (;;) + { + int status; + pid_t pid = waitpid(-1, &status, 0); + if (pid == 0) + break; + if (pid == -1) { - // From this point on we are in the child process. Until we have - // entered the test directory and re-opened fds 0-2 it is not safe - // to throw exceptions or call any of the diagnostic routines. - // Hence we use bare OS primitives and call _exit(), if any of them - // fail. It is safe to assume that close() will not fail. - if (chdir(testdir.c_str()) != 0) - _exit(123); + if (errno == ECHILD) + break; + if (errno == EINTR) + continue; + E(false, F("waitpid failed: %s") % os_strerror(errno)); + } - close(0); - if (open("/dev/null", O_RDONLY) != 0) - _exit(124); + map::iterator tfin = children.find(pid); + I(tfin != children.end()); + if (cleanup(tfin->second, status)) + do_remove_recursive(run_dir + "/" + tfin->second.name); + children.erase(tfin); + release_token(); + } - close(1); - if (open("tester.log", O_WRONLY|O_CREAT|O_EXCL, 0666) != 1) - _exit(125); - if (dup2(1, 2) == -1) - _exit(126); - - invoke(t.name); - // If invoke() returns something has gone terribly wrong. - _exit(127); - } - } + I(tokens_held == 0); + I(children.size() == 0); + close(jobsvr_read_dup); + jobsvr_read_dup = -1; + sigaction(SIGCHLD, &osa, 0); } // Local Variables: ============================================================ --- win32/tester-plaf.cc 4baec6a135f718d97454be05fba7a7bc29cdd4b8 +++ win32/tester-plaf.cc 4d713dfc459b94a813eccddd6a3295bb0dd007ed @@ -103,6 +103,23 @@ bool running_as_root() return false; } +// FIXME: I don't know any intrinsic reason why parallel test cases and the +// jobserver protocol could not be supported on Windows (see the lengthy +// explanation of the protocol in unix/tester-plaf.cc) but someone with a +// deep understanding of Win32 would have to implement it to ensure its +// race-free-ness. (Before bothering, confirm that GNU Make supports the +// jobserver protocol on Windows.) +// +// NOTE TO POTENTIAL FIXERS: If you code this with the fake POSIX layer in +// the C runtime instead of with WaitForMultipleObjects and other kernel +// primitives, you will suffer the curse of the vengeful ghost of Dave Cutler. + +void prepare_for_parallel_testcases(int jobs, int, int) +{ + if (jobs != 1) + W(F("parallel execution of test cases is not supported on Windows.")); +} + // General note: the magic numbers in this function are meaningful to // testlib.lua. They indicate a number of failure scenarios in which // more detailed diagnostics are not possible. @@ -144,10 +161,7 @@ void run_tests_in_children(test_enumerat change_current_working_dir(testdir); argv[4] = t.name.c_str(); - pid_t child = process_spawn_redirected("NUL:", - "tester.log", - "tester.log", - argv); + pid_t child = process_spawn(argv); change_current_working_dir(run_dir); int status; @@ -161,7 +175,6 @@ void run_tests_in_children(test_enumerat } } - // Local Variables: // mode: C++ // fill-column: 76