emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 71783e9: Add the string-numeric-lessp function


From: Lars Ingebrigtsen
Subject: [Emacs-diffs] master 71783e9: Add the string-numeric-lessp function
Date: Sun, 21 Feb 2016 04:33:48 +0000

branch: master
commit 71783e90a46ca913ea2c334cdc8cb24cd74055f8
Author: Lars Ingebrigtsen <address@hidden>
Commit: Lars Ingebrigtsen <address@hidden>

    Add the string-numeric-lessp function
    
    * doc/lispref/strings.texi (Text Comparison): Document
    `string-numerical-lessp'.
    
    * src/fns.c (Fstring_numeric_lessp): New function.
    (gather_number_from_string): Helper function for that function.
    
    * test/src/fns-tests.el (fns-tests-string-numeric-lessp): Add tests.
---
 doc/lispref/strings.texi |   13 ++++++
 etc/NEWS                 |    6 +++
 src/fns.c                |   98 ++++++++++++++++++++++++++++++++++++++++++++++
 test/src/fns-tests.el    |   17 ++++++++
 4 files changed, 134 insertions(+), 0 deletions(-)

diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi
index 9d6613c..a3efbf2 100644
--- a/doc/lispref/strings.texi
+++ b/doc/lispref/strings.texi
@@ -633,6 +633,19 @@ If your system does not support a locale environment, this 
function
 behaves like @code{string-lessp}.
 @end defun
 
address@hidden string-numerical-lessp strin1 string2
+This function behaves like @code{string-lessp} for stretches of
+consecutive non-numerical characters, but compares sequences of
+numerical characters as if they comprised a base-ten number, and then
+compares the numbers.  So @samp{foo2.png} is ``smaller'' than
address@hidden according to this predicate, even if @samp{12} is
+lexicographically ``smaller'' than @samp{2}.
+
+If one string has a number in a position in the string, and the other
+doesn't, then lexicograpic comparison is done at that point, so
address@hidden is ``smaller'' than @samp{foo2.png}.
address@hidden defun
+
 @defun string-prefix-p string1 string2 &optional ignore-case
 This function returns address@hidden if @var{string1} is a prefix of
 @var{string2}; i.e., if @var{string2} starts with @var{string1}.  If
diff --git a/etc/NEWS b/etc/NEWS
index 33c1b13..9f0fb8d 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1720,6 +1720,12 @@ environment.  For the time being this is implemented for 
modern POSIX
 systems and for MS-Windows, for other systems they fall back to their
 counterparts `string-lessp' and `string-equal'.
 
++++
+** The new function `string-numeric-lessp' compares strings by
+interpreting consecutive runs of numerical characters as numbers, and
+compares their numerical values.  According to this predicate,
+"foo2.png" is smaller than "foo12.png".
+
 ---
 *** The ls-lisp package uses `string-collate-lessp' to sort file names.
 The effect is that, on systems that use ls-lisp for Dired, the default
diff --git a/src/fns.c b/src/fns.c
index d180844..927fcda 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -331,6 +331,103 @@ Symbols are also allowed; their print names are used 
instead.  */)
   return i1 < SCHARS (string2) ? Qt : Qnil;
 }
 
+/* Return the numerical value of a consecutive run of numerical
+   characters from STRING.  The ISP and ISP_BYTE address pointer
+   pointers are increased and left at the next character after the
+   numerical characters. */
+static size_t
+gather_number_from_string (int c, Lisp_Object string,
+                          ptrdiff_t *isp, ptrdiff_t *isp_byte)
+{
+  size_t number = c - '0';
+  unsigned char *chp;
+  int chlen;
+
+  do
+    {
+      if (STRING_MULTIBYTE (string))
+       {
+         chp = &SDATA (string)[*isp_byte];
+         c = STRING_CHAR_AND_LENGTH (chp, chlen);
+       }
+      else
+       {
+         c = SREF (string, *isp_byte);
+         chlen = 1;
+       }
+
+      /* If we're still in a number, add it to the sum and continue. */
+      /* FIXME: Integer overflow? */
+      if (c >= '0' && c <= '9')
+       {
+         number = number * 10;
+         number += c - '0';
+         (*isp)++;
+         (*isp_byte) += chlen;
+       }
+      else
+       break;
+    }
+  /* Stop when we get to the end of the string anyway. */
+  while (c != 0);
+
+  return number;
+}
+
+DEFUN ("string-numeric-lessp", Fstring_numeric_lessp,
+       Sstring_numeric_lessp, 2, 2, 0,
+       doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' 
order.
+Sequences of non-numerical characters are compared lexicographically,
+while sequences of numerical characters are converted into numbers,
+and then the numbers are compared.  This means that \"foo2.png\" is
+less than \"foo12.png\" according to this predicate.
+Case is significant.
+Symbols are also allowed; their print names are used instead.  */)
+  (register Lisp_Object string1, Lisp_Object string2)
+{
+  ptrdiff_t end;
+  ptrdiff_t i1, i1_byte, i2, i2_byte;
+  size_t num1, num2;
+
+  if (SYMBOLP (string1))
+    string1 = SYMBOL_NAME (string1);
+  if (SYMBOLP (string2))
+    string2 = SYMBOL_NAME (string2);
+  CHECK_STRING (string1);
+  CHECK_STRING (string2);
+
+  i1 = i1_byte = i2 = i2_byte = 0;
+
+  end = SCHARS (string1);
+  if (end > SCHARS (string2))
+    end = SCHARS (string2);
+
+  while (i1 < end)
+    {
+      /* When we find a mismatch, we must compare the
+        characters, not just the bytes.  */
+      int c1, c2;
+
+      FETCH_STRING_CHAR_ADVANCE (c1, string1, i1, i1_byte);
+      FETCH_STRING_CHAR_ADVANCE (c2, string2, i2, i2_byte);
+
+      if (c1 >= '0' && c1 <= '9' &&
+         c2 >= '0' && c2 <= '9')
+       /* Both strings are numbers, so compare them. */
+       {
+         num1 = gather_number_from_string (c1, string1, &i1, &i1_byte);
+         num2 = gather_number_from_string (c2, string2, &i2, &i2_byte);
+         if (num1 < num2)
+           return Qt;
+         else if (num1 > num2)
+           return Qnil;
+       }
+      else if (c1 != c2)
+       return c1 < c2 ? Qt : Qnil;
+    }
+  return i1 < SCHARS (string2) ? Qt : Qnil;
+}
+
 DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 
2, 4, 0,
        doc: /* Return t if first arg string is less than second in collation 
order.
 Symbols are also allowed; their print names are used instead.
@@ -5049,6 +5146,7 @@ this variable.  */);
   defsubr (&Sstring_equal);
   defsubr (&Scompare_strings);
   defsubr (&Sstring_lessp);
+  defsubr (&Sstring_numeric_lessp);
   defsubr (&Sstring_collate_lessp);
   defsubr (&Sstring_collate_equalp);
   defsubr (&Sappend);
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index 762f7bd..0c6edb8 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -191,3 +191,20 @@
              (string-collate-lessp
               a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8")))))
     '("Adrian" "Ævar" "Agustín" "Eli"))))
+
+(ert-deftest fns-tests-string-numeric-lessp ()
+  (should (string-numeric-lessp "foo2.png" "foo12.png"))
+  (should (not (string-numeric-lessp "foo12.png" "foo2.png")))
+  (should (string-numeric-lessp "foo12.png" "foo20000.png"))
+  (should (not (string-numeric-lessp "foo20000.png" "foo12.png")))
+  (should (string-numeric-lessp "foo.png" "foo2.png"))
+  (should (not (string-numeric-lessp "foo2.png" "foo.png")))
+  (should (equal (sort '("foo12.png" "foo2.png" "foo1.png")
+                       'string-numeric-lessp)
+                 '("foo1.png" "foo2.png" "foo12.png")))
+  (should (string-numeric-lessp "foo2" "foo1234"))
+  (should (not (string-numeric-lessp "foo1234" "foo2")))
+  (should (string-numeric-lessp "foo.png" "foo2"))
+  (should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5"))
+  (should (string-numeric-lessp "2" "1245"))
+  (should (not (string-numeric-lessp "1245" "2"))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]