>From 74b9a044be378a424fd895c6eb85149b4bc691a3 Mon Sep 17 00:00:00 2001 From: Joey Degges Date: Mon, 1 Mar 2010 10:26:22 +0000 Subject: [PATCH] sort: inform the system about our input access pattern Tell the system that we'll access input sequentially, so that we more efficiently process uncached files in a few cases: Reading from faster flash devices. E.g. 21 MB/s key: NORMAL 31.6s (26.8 user) SEQUENTIAL 27.7s WILLNEED 27.7s Processing in parallel with readahead when using a small 1M buffer: NORMAL 24.7s (21.1 user) SEQUENTIAL 22.7s WILLNEED 25.6s A small benefit when merging: NORMAL 25.0s (16.9 user) SEQUENTIAL 24.6s (16.6 user) WILLNEED 38.4s (13.1 user) Note WILLNEED is presented above for comparison to show it has some unwanted characteristics due to its synchronous prepopulation of the cache. It has a good benefit on a mechanical disk @ 80MB/s and a multicore system with competing processes: NORMAL 14.73s SEQUENTIAL 10.95s WILLNEED 05.22s However the scheduling differences causing this result are probably best explicitly managed using `nice` etc. * configure.ac: check for posix_fadvise(). * src/sort.c (fadvise_input): A new function to apply the POSIX_FADV_SEQUENTIAL hint to an input stream. (stream_open): Call the above function for all input streams. --- configure.ac | 3 ++ src/sort.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index b07a52b..c07fbd4 100644 --- a/configure.ac +++ b/configure.ac @@ -344,6 +344,9 @@ if test "$elf_sys" = "yes" && \ gl_ADD_PROG([optional_pkglib_progs], [libstdbuf.so]) fi +# Check for fcntl.h/posix_fadvise +AC_CHECK_HEADERS(fcntl.h, [AC_CHECK_FUNCS(posix_fadvise)]) + ############################################################################ mk="$srcdir/src/Makefile.am" # Extract all literal names from the definition of $(EXTRA_PROGRAMS) diff --git a/src/sort.c b/src/sort.c index 39cb6d6..02b2351 100644 --- a/src/sort.c +++ b/src/sort.c @@ -32,6 +32,7 @@ #include "filevercmp.h" #include "hard-locale.h" #include "hash.h" +#include "ignore-value.h" #include "md5.h" #include "physmem.h" #include "posixver.h" @@ -794,6 +795,61 @@ create_temp_file (int *pfd, bool survive_fd_exhaustion) return node; } +/* Predeclare an access pattern for input files. + Ignore any errors -- this is only advisory. + + There are a few hints we could possibly provide, + and after careful testing it was decided that + specifying POSIX_FADV_SEQUENTIAL was not detrimental + to any cases. On Linux 2.6.31, this option doubles + the size of read ahead performed and thus was seen to + benefit these cases: + Merging + Sorting with a smaller internal buffer + Reading from faster flash devices + + In _addition_ one could also specify other hints... + + POSIX_FADV_WILLNEED was tested, but Linux 2.6.31 + at least uses that to _synchronously_ prepopulate the cache + with the specified range. While sort does need to + read all of its input before outputting, a synchronous + read of the whole file up front precludes any processing + that sort could do in parallel with the system doing + read ahead of the data. This was seen to have negative effects + in a couple of cases: + Merging + Sorting with a smaller internal buffer + Note this option was seen to shorten the runtime for sort + on a multicore system with lots of RAM and other processes + competing for CPU. It could be argued that more explicit + scheduling hints with `nice` et. al. are more appropriate + for this situation. + + POSIX_FADV_NOREUSE is a possibility as it could lower + the priority of input data in the cache as sort will + only need to process it once. However its functionality + has changed over Linux kernel versions and as of 2.6.31 + it does nothing and thus we can't depend on what it might + do in future. + + POSIX_FADV_DONTNEED is not appropriate for user specified + input files, but for temp files we do want to drop the + cache immediately after processing. This is done implicitly + however when the files are unlinked. */ + +static void +fadvise_input (FILE *fp) +{ +#if HAVE_POSIX_FADVISE + if (fp) + { + int fd = fileno (fp); + ignore_value (posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL)); + } +#endif +} + /* Return a stream for FILE, opened with mode HOW. A null FILE means standard output; HOW should be "w". When opening for input, "-" means standard input. To avoid confusion, do not return file @@ -805,10 +861,18 @@ stream_open (const char *file, const char *how) { if (!file) return stdout; - if (STREQ (file, "-") && *how == 'r') + if (*how == 'r') { - have_read_stdin = true; - return stdin; + FILE *fp; + if (STREQ (file, "-")) + { + have_read_stdin = true; + fp = stdin; + } + else + fp = fopen (file, how); + fadvise_input (fp); + return fp; } return fopen (file, how); } -- 1.6.2.5