diff -ur findutils-4.4.2/find/defs.h findutils-4.4.2-rreale/find/defs.h --- findutils-4.4.2/find/defs.h 2009-05-16 17:17:01.000000000 +0200 +++ findutils-4.4.2-rreale/find/defs.h 2010-04-04 19:11:38.000000000 +0200 @@ -65,6 +65,8 @@ #include "buildcmd.h" #include "quotearg.h" +#define LARGE_BLOCK_SIZE 4096 + /* These days we will assume ANSI/ISO C protootypes work on our compiler. */ #define PARAMS(Args) Args @@ -167,6 +169,15 @@ int fd; }; +struct samecontent_args +{ + struct stat st; + char buf[LARGE_BLOCK_SIZE]; + size_t size; + boolean buf_read; + char *filename; +}; + struct size_val { enum comparison_type kind; @@ -313,6 +324,7 @@ struct time_val reftime; /* newer newerXY anewer cnewer mtime atime ctime mmin amin cmin */ struct perm_val perm; /* perm */ struct samefile_file_id samefileid; /* samefile */ + struct samecontent_args samecontentargs; /* samecontent */ mode_t type; /* type */ struct format_val printf_vec; /* printf fprintf fprint ls fls print0 fprint0 print */ } args; @@ -459,6 +471,7 @@ PREDICATEFUNCTION pred_user; PREDICATEFUNCTION pred_writable; PREDICATEFUNCTION pred_xtype; +PREDICATEFUNCTION pred_samecontent; @@ -535,7 +548,7 @@ /* If true, -depth was EXPLICITLY set (as opposed to having been turned * on by -delete, for example). */ - boolean explicit_depth; + boolean explicit_depth; /* If >=0, don't descend more than this many levels of subdirectories. */ int maxdepth; diff -ur findutils-4.4.2/find/parser.c findutils-4.4.2-rreale/find/parser.c --- findutils-4.4.2/find/parser.c 2009-05-16 17:17:01.000000000 +0200 +++ findutils-4.4.2-rreale/find/parser.c 2010-04-04 19:35:50.000000000 +0200 @@ -154,6 +154,7 @@ static boolean parse_noignore_race PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_warn PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_xtype PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); +static boolean parse_samecontent PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_quit PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); boolean parse_print PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); @@ -321,6 +322,7 @@ {ARG_TEST, "writable", parse_accesscheck, pred_writable}, /* GNU, 4.3.0+ */ PARSE_OPTION ("xdev", xdev), /* POSIX */ PARSE_TEST ("xtype", xtype), /* GNU */ + PARSE_TEST ("samecontent", samecontent), #ifdef UNIMPLEMENTED_UNIX /* It's pretty ugly for find to know about archive formats. Plus what it could do with cpio archives is very limited. @@ -2585,6 +2587,42 @@ { return insert_type (argv, arg_ptr, entry, pred_xtype); } + +static boolean +parse_samecontent (const struct parser_table* entry, char **argv, int *arg_ptr) +{ + struct predicate *our_pred; + const char *filename; + struct stat st; + + set_stat_placeholders(&st); + + if (collect_arg(argv, arg_ptr, &filename)) + { + if (0 != (options.xstat)(filename, &st)) + { + fatal_file_error(filename); + } + } + else + { + return false; + } + + our_pred = insert_primary (entry); + memcpy (&our_pred->args.samecontentargs.st, &st, sizeof (struct stat)); + our_pred->args.samecontentargs.filename = xmalloc (strlen (filename) + 1); + strcpy (our_pred->args.samecontentargs.filename, filename); + our_pred->args.samecontentargs.buf_read = false; + +#if 0 + our_pred->args.samefileid.fd = fd; + our_pred->need_type = false; + our_pred->need_stat = true; +#endif + our_pred->est_success_rate = 0.01f; + return true; +} static boolean insert_type (char **argv, int *arg_ptr, diff -ur findutils-4.4.2/find/pred.c findutils-4.4.2-rreale/find/pred.c --- findutils-4.4.2/find/pred.c 2009-05-16 17:17:01.000000000 +0200 +++ findutils-4.4.2-rreale/find/pred.c 2010-04-04 19:36:41.000000000 +0200 @@ -20,7 +20,24 @@ #include "defs.h" #include + #include +#ifndef SA_RESTART +# ifdef SA_INTERRUPT /* e.g. SunOS 4.1.x */ +# define SA_RESTART SA_INTERRUPT +# else +# define SA_RESTART 0 +# endif +#endif + +#if HAVE_UNISTD_H +# include +#endif + +#if HAVE_INTTYPES_H +# include +#endif + #include #include #include @@ -33,6 +50,7 @@ #include #include #include +#include #include "xalloc.h" #include "dirname.h" #include "human.h" @@ -47,6 +65,12 @@ #include "dircallback.h" #include "error.h" #include "verify.h" +#include "intprops.h" + +/* Type used for fast comparison of several bytes at a time. */ +#ifndef word +# define word uintmax_t +#endif #if ENABLE_NLS # include @@ -230,6 +254,7 @@ {pred_user, "user "}, {pred_writable, "writable "}, {pred_xtype, "xtype "}, + {pred_samecontent, "samecontent "}, {0, "none "} }; #endif @@ -1844,6 +1869,294 @@ */ return (pred_type (pathname, &sbuf, pred_ptr)); } + +/* Buffer primitives for comparison operations. Adapted from lib/cmpbuf.c + in GNU diffutils 2.9. + + Copyright (C) 1993, 1995, 1998, 2001-2002, 2006, 2009-2010 Free Software + Foundation, Inc. */ + +/*#include "cmpbuf.h"*/ + +#ifndef PTRDIFF_MAX +# define PTRDIFF_MAX TYPE_MAXIMUM (ptrdiff_t) +#endif +#ifndef SIZE_MAX +# define SIZE_MAX TYPE_MAXIMUM (size_t) +#endif +#ifndef SSIZE_MAX +# define SSIZE_MAX TYPE_MAXIMUM (ssize_t) +#endif + +#undef MIN +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +/* Read NBYTES bytes from descriptor FD into BUF. + NBYTES must not be SIZE_MAX. + Return the number of characters successfully read. + On error, return SIZE_MAX, setting errno. + The number returned is always NBYTES unless end-of-file or error. */ + +size_t +block_read (int fd, char *buf, size_t nbytes) +{ + char *bp = buf; + char const *buflim = buf + nbytes; + size_t readlim = MIN (SSIZE_MAX, SIZE_MAX); + + do + { + size_t bytes_remaining = buflim - bp; + size_t bytes_to_read = MIN (bytes_remaining, readlim); + ssize_t nread = read (fd, bp, bytes_to_read); + if (nread <= 0) + { + if (nread == 0) + break; + + /* Accommodate Tru64 5.1, which can't read more than INT_MAX + bytes at a time. They call that a 64-bit OS? */ + if (errno == EINVAL && INT_MAX < bytes_to_read) + { + readlim = INT_MAX; + continue; + } + + /* This is needed for programs that have signal handlers on + older hosts without SA_RESTART. It also accommodates + ancient AIX hosts that set errno to EINTR after uncaught + SIGCONT. See + (1993-04-22). */ + if (! SA_RESTART && errno == EINTR) + continue; + + return SIZE_MAX; + } + bp += nread; + } + while (bp < buflim); + + return bp - buf; +} + +/* Compare two files byte by byte. Adapted from src/cmp.c in + GNU diffutils 2.9. + + Copyright (C) 1990-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2010 Free + Software Foundation, Inc. */ + +static boolean cmp (int fd0, int fd1, struct predicate *pred_ptr); +static size_t block_compare (word const *, word const *); + +/* Number of bytes to compare. */ +static uintmax_t bytes = UINTMAX_MAX; + +/* Compare the two files already open on `file_desc[0]' and `file_desc[1]', + using `buffer[0]' and `buffer[1]'. */ + +static boolean +cmp (int fd0, int fd1, struct predicate *pred_ptr) +{ + off_t byte_number = 1; /* Byte number (1...) of difference. */ + uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */ + size_t read0, read1; /* Number of bytes read from each file. */ + size_t first_diff; /* Offset (0...) in buffers of 1st diff. */ + char *buffer0, *buffer1; + boolean first_block = true; + int differing = 0; + int f; + int offset_width = 0; + size_t buf_size = LARGE_BLOCK_SIZE; + + /* Allocate word-aligned buffers, with space for sentinels at the end. */ + + buffer0 = (char *) xmalloc (buf_size); + buffer1 = (char *) xmalloc (buf_size); + + do + { + size_t bytes_to_read = buf_size; + + if (remaining != UINTMAX_MAX) + { + if (remaining < bytes_to_read) + bytes_to_read = remaining; + remaining -= bytes_to_read; + } + + if (first_block) + { + read0 = pred_ptr->args.samecontentargs.size; + buffer0 = pred_ptr->args.samecontentargs.buf; + first_block = false; + } + else + { + read0 = block_read (fd0, buffer0, bytes_to_read); +#if 0 + if (read0 == SIZE_MAX) + error (EXIT_TROUBLE, errno, "%s", file[0]); +#endif + } + + read1 = block_read (fd1, buffer1, bytes_to_read); +#if 0 + if (read1 == SIZE_MAX) + error (EXIT_TROUBLE, errno, "%s", file[1]); +#endif + + if (read0 != read1) + return false; + + /* Insert sentinels for the block compare. */ + + buffer0[read0] = ~buffer1[read0]; + buffer1[read1] = ~buffer0[read1]; + + first_diff = block_compare ((word *) buffer0, (word *) buffer1); + + byte_number += first_diff; + + if (first_diff < read0) + return false; + } + while (differing <= 0 && read0 == buf_size); + + return differing == 0 ? true : false; +} + +/* Compare two blocks of memory P0 and P1 until they differ. + If the blocks are not guaranteed to be different, put sentinels at the ends + of the blocks before calling this function. + + Return the offset of the first byte that differs. */ + +static size_t +block_compare (word const *p0, word const *p1) +{ + word const *l0, *l1; + char const *c0, *c1; + + /* Find the rough position of the first difference by reading words, + not bytes. */ + + for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++) + continue; + + /* Find the exact differing position (endianness independent). */ + + for (c0 = (char const *) l0, c1 = (char const *) l1; + *c0 == *c1; + c0++, c1++) + continue; + + return c0 - (char const *) p0; +} + +/* Following macros adapted from src/system.h in GNU diffutils 2.9. + + Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2010 + Free Software Foundation, Inc. */ + +/* Do struct stat *S, *T describe the same special file? */ +#if HAVE_STRUCT_STAT_ST_RDEV && defined S_ISBLK && defined S_ISCHR +# define same_special_file(s, t) \ + (((S_ISBLK ((s)->st_mode) && S_ISBLK ((t)->st_mode)) \ + || (S_ISCHR ((s)->st_mode) && S_ISCHR ((t)->st_mode))) \ + && (s)->st_rdev == (t)->st_rdev) +#else +# define same_special_file(s, t) 0 +#endif + +/* Do struct stat *S, *T describe the same file? Answer -1 if unknown. */ +#define same_file(s, t) \ + ((((s)->st_ino == (t)->st_ino) && ((s)->st_dev == (t)->st_dev)) \ + || same_special_file (s, t)) + +/* Do struct stat *S, *T have the same file attributes? + + POSIX says that two files are identical if st_ino and st_dev are + the same, but many file systems incorrectly assign the same (device, + inode) pair to two distinct files, including: + + - GNU/Linux NFS servers that export all local file systems as a + single NFS file system, if a local device number (st_dev) exceeds + 255, or if a local inode number (st_ino) exceeds 16777215. + + - Network Appliance NFS servers in snapshot directories; see + Network Appliance bug #195. + + - ClearCase MVFS; see bug id ATRia04618. + + Check whether two files that purport to be the same have the same + attributes, to work around instances of this common bug. Do not + inspect all attributes, only attributes useful in checking for this + bug. + + It's possible for two distinct files on a buggy file system to have + the same attributes, but it's not worth slowing down all + implementations (or complicating the configuration) to cater to + these rare cases in buggy implementations. */ + +#define same_file_attributes(s, t) \ + ((s)->st_mode == (t)->st_mode \ + && (s)->st_nlink == (t)->st_nlink \ + && (s)->st_uid == (t)->st_uid \ + && (s)->st_gid == (t)->st_gid \ + && (s)->st_size == (t)->st_size \ + && (s)->st_mtime == (t)->st_mtime \ + && (s)->st_ctime == (t)->st_ctime) + +boolean +pred_samecontent (const char *pathname, struct stat *stat_buf, struct predicate *pred_ptr) +{ + struct stat *st = &pred_ptr->args.samecontentargs.st; + size_t words_per_buffer; + int fd0; + int fd1; + boolean exit_status = false; + char *filename = pred_ptr->args.samecontentargs.filename; + (void) pathname; + + /* If the files are links to the same inode and have the same file position, + they are identical. */ + + if (0 < same_file (stat_buf, st) && same_file_attributes (stat_buf, st)) + return true; + + /* If both input descriptors are associated with plain files, + conclude that the files differ if they have different sizes. */ + + if (S_ISREG (stat_buf->st_mode) && S_ISREG (st->st_mode)) + { + off_t s0 = stat_buf->st_size; + off_t s1 = st->st_size; + if (s0 != s1) + return false; + } + + fd0 = open (filename, O_RDONLY | O_BINARY, 0); + fd1 = open (pathname, O_RDONLY | O_BINARY, 0); + + if (!pred_ptr->args.samecontentargs.buf_read) + { + size_t read; + read = block_read (fd0, pred_ptr->args.samecontentargs.buf, LARGE_BLOCK_SIZE); +#if 0 + if (read == SIZE_MAX) + error (EXIT_TROUBLE, errno, "%s", file[0]); +#endif + pred_ptr->args.samecontentargs.size = read; + pred_ptr->args.samecontentargs.buf_read = true; + } + + exit_status = cmp (fd0, fd1, pred_ptr); + + close (fd0); + close (fd1); + + return exit_status; +} /* 1) fork to get a child; parent remembers the child pid 2) child execs the command requested