[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[libmicrohttpd] 15/22: sha{256, 512_256}: improved performance of the fi
From: |
gnunet |
Subject: |
[libmicrohttpd] 15/22: sha{256, 512_256}: improved performance of the first steps on BE arches |
Date: |
Sun, 25 Sep 2022 17:43:50 +0200 |
This is an automated email from the git hooks/post-receive script.
karlson2k pushed a commit to branch master
in repository libmicrohttpd.
commit b50bed1269be493f453c24de196bf89229abd2fd
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
AuthorDate: Wed Sep 14 15:58:09 2022 +0300
sha{256,512_256}: improved performance of the first steps on BE arches
---
src/microhttpd/sha256.c | 98 +++++++++++++++++++++++++++++----------------
src/microhttpd/sha512_256.c | 98 +++++++++++++++++++++++++++++----------------
2 files changed, 126 insertions(+), 70 deletions(-)
diff --git a/src/microhttpd/sha256.c b/src/microhttpd/sha256.c
index b03e7555..2f9edf6a 100644
--- a/src/microhttpd/sha256.c
+++ b/src/microhttpd/sha256.c
@@ -145,9 +145,7 @@ sha256_transform (uint32_t H[SHA256_DIGEST_SIZE_WORDS],
+ (w)[((t) - 7) & 0xf] + sig0 ((w)[((t) - 15) & 0xf]) )
#ifndef MHD_FAVOR_SMALL_CODE
- /* During first 16 steps, before making any calculations on each step,
- the W element is read from input data buffer as big-endian value and
- stored in array of W elements. */
+
/* Note: instead of using K constants as array, all K values are specified
individually for each step, see FIPS PUB 180-4 paragraph 4.2.2 for
K values. */
@@ -157,38 +155,68 @@ sha256_transform (uint32_t H[SHA256_DIGEST_SIZE_WORDS],
SHA2STEP32(h, a, b, c, d, e, f, g, K[1], data[1]);
so current 'vD' will be used as 'vE' on next step,
current 'vH' will be used as 'vA' on next step. */
- SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0x428a2f98), W[0] = \
- GET_W_FROM_DATA (data, 0));
- SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x71374491), W[1] = \
- GET_W_FROM_DATA (data, 1));
- SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0xb5c0fbcf), W[2] = \
- GET_W_FROM_DATA (data, 2));
- SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0xe9b5dba5), W[3] = \
- GET_W_FROM_DATA (data, 3));
- SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x3956c25b), W[4] = \
- GET_W_FROM_DATA (data, 4));
- SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x59f111f1), W[5] = \
- GET_W_FROM_DATA (data, 5));
- SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x923f82a4), W[6] = \
- GET_W_FROM_DATA (data, 6));
- SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xab1c5ed5), W[7] = \
- GET_W_FROM_DATA (data, 7));
- SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0xd807aa98), W[8] = \
- GET_W_FROM_DATA (data, 8));
- SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x12835b01), W[9] = \
- GET_W_FROM_DATA (data, 9));
- SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0x243185be), W[10] = \
- GET_W_FROM_DATA (data, 10));
- SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0x550c7dc3), W[11] = \
- GET_W_FROM_DATA (data, 11));
- SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x72be5d74), W[12] = \
- GET_W_FROM_DATA (data, 12));
- SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x80deb1fe), W[13] = \
- GET_W_FROM_DATA (data, 13));
- SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x9bdc06a7), W[14] = \
- GET_W_FROM_DATA (data, 14));
- SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xc19bf174), W[15] = \
- GET_W_FROM_DATA (data, 15));
+#if _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN
+ if ((const void *) W == data)
+ {
+ /* The input data is already in the cyclic data buffer W[] in correct bytes
+ order. */
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0x428a2f98), W[0]);
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x71374491), W[1]);
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0xb5c0fbcf), W[2]);
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0xe9b5dba5), W[3]);
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x3956c25b), W[4]);
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x59f111f1), W[5]);
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x923f82a4), W[6]);
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xab1c5ed5), W[7]);
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0xd807aa98), W[8]);
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x12835b01), W[9]);
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0x243185be), W[10]);
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0x550c7dc3), W[11]);
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x72be5d74), W[12]);
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x80deb1fe), W[13]);
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x9bdc06a7), W[14]);
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xc19bf174), W[15]);
+ }
+ else /* Combined with the next 'if' */
+#endif /* _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN */
+ if (1)
+ {
+ /* During first 16 steps, before making any calculations on each step,
+ the W element is read from input data buffer as big-endian value and
+ stored in array of W elements. */
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0x428a2f98), W[0] = \
+ GET_W_FROM_DATA (data, 0));
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x71374491), W[1] = \
+ GET_W_FROM_DATA (data, 1));
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0xb5c0fbcf), W[2] = \
+ GET_W_FROM_DATA (data, 2));
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0xe9b5dba5), W[3] = \
+ GET_W_FROM_DATA (data, 3));
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x3956c25b), W[4] = \
+ GET_W_FROM_DATA (data, 4));
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x59f111f1), W[5] = \
+ GET_W_FROM_DATA (data, 5));
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x923f82a4), W[6] = \
+ GET_W_FROM_DATA (data, 6));
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xab1c5ed5), W[7] = \
+ GET_W_FROM_DATA (data, 7));
+ SHA2STEP32 (a, b, c, d, e, f, g, h, UINT32_C (0xd807aa98), W[8] = \
+ GET_W_FROM_DATA (data, 8));
+ SHA2STEP32 (h, a, b, c, d, e, f, g, UINT32_C (0x12835b01), W[9] = \
+ GET_W_FROM_DATA (data, 9));
+ SHA2STEP32 (g, h, a, b, c, d, e, f, UINT32_C (0x243185be), W[10] = \
+ GET_W_FROM_DATA (data, 10));
+ SHA2STEP32 (f, g, h, a, b, c, d, e, UINT32_C (0x550c7dc3), W[11] = \
+ GET_W_FROM_DATA (data, 11));
+ SHA2STEP32 (e, f, g, h, a, b, c, d, UINT32_C (0x72be5d74), W[12] = \
+ GET_W_FROM_DATA (data, 12));
+ SHA2STEP32 (d, e, f, g, h, a, b, c, UINT32_C (0x80deb1fe), W[13] = \
+ GET_W_FROM_DATA (data, 13));
+ SHA2STEP32 (c, d, e, f, g, h, a, b, UINT32_C (0x9bdc06a7), W[14] = \
+ GET_W_FROM_DATA (data, 14));
+ SHA2STEP32 (b, c, d, e, f, g, h, a, UINT32_C (0xc19bf174), W[15] = \
+ GET_W_FROM_DATA (data, 15));
+ }
/* During last 48 steps, before making any calculations on each step,
current W element is generated from other W elements of the cyclic buffer
diff --git a/src/microhttpd/sha512_256.c b/src/microhttpd/sha512_256.c
index a42a9b8e..b7682b28 100644
--- a/src/microhttpd/sha512_256.c
+++ b/src/microhttpd/sha512_256.c
@@ -144,9 +144,7 @@ sha512_256_transform (uint64_t
H[SHA512_256_HASH_SIZE_WORDS],
+ (w)[((t) - 7) & 15] + sig0 ((w)[((t) - 15) & 15]) )
#ifndef MHD_FAVOR_SMALL_CODE
- /* During first 16 steps, before making any calculations on each step,
- the W element is read from the input data buffer as big-endian value and
- stored in the array of W elements. */
+
/* Note: instead of using K constants as array, all K values are specified
individually for each step, see FIPS PUB 180-4 clause 4.2.3 for
K values. */
@@ -156,38 +154,68 @@ sha512_256_transform (uint64_t
H[SHA512_256_HASH_SIZE_WORDS],
SHA2STEP64(h, a, b, c, d, e, f, g, K[1], data[1]);
so current 'vD' will be used as 'vE' on next step,
current 'vH' will be used as 'vA' on next step. */
- SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0x428a2f98d728ae22), \
- W[0] = GET_W_FROM_DATA (data, 0));
- SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x7137449123ef65cd), \
- W[1] = GET_W_FROM_DATA (data, 1));
- SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0xb5c0fbcfec4d3b2f), \
- W[2] = GET_W_FROM_DATA (data, 2));
- SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0xe9b5dba58189dbbc), \
- W[3] = GET_W_FROM_DATA (data, 3));
- SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x3956c25bf348b538), \
- W[4] = GET_W_FROM_DATA (data, 4));
- SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x59f111f1b605d019), \
- W[5] = GET_W_FROM_DATA (data, 5));
- SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x923f82a4af194f9b), \
- W[6] = GET_W_FROM_DATA (data, 6));
- SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xab1c5ed5da6d8118), \
- W[7] = GET_W_FROM_DATA (data, 7));
- SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0xd807aa98a3030242), \
- W[8] = GET_W_FROM_DATA (data, 8));
- SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x12835b0145706fbe), \
- W[9] = GET_W_FROM_DATA (data, 9));
- SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0x243185be4ee4b28c), \
- W[10] = GET_W_FROM_DATA (data, 10));
- SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0x550c7dc3d5ffb4e2), \
- W[11] = GET_W_FROM_DATA (data, 11));
- SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x72be5d74f27b896f), \
- W[12] = GET_W_FROM_DATA (data, 12));
- SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x80deb1fe3b1696b1), \
- W[13] = GET_W_FROM_DATA (data, 13));
- SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x9bdc06a725c71235), \
- W[14] = GET_W_FROM_DATA (data, 14));
- SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xc19bf174cf692694), \
- W[15] = GET_W_FROM_DATA (data, 15));
+#if _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN
+ if ((const void *) W == data)
+ {
+ /* The input data is already in the cyclic data buffer W[] in correct bytes
+ order. */
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0x428a2f98d728ae22), W[0]);
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x7137449123ef65cd), W[1]);
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0xb5c0fbcfec4d3b2f), W[2]);
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0xe9b5dba58189dbbc), W[3]);
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x3956c25bf348b538), W[4]);
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x59f111f1b605d019), W[5]);
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x923f82a4af194f9b), W[6]);
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xab1c5ed5da6d8118), W[7]);
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0xd807aa98a3030242), W[8]);
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x12835b0145706fbe), W[9]);
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0x243185be4ee4b28c), W[10]);
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0x550c7dc3d5ffb4e2), W[11]);
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x72be5d74f27b896f), W[12]);
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x80deb1fe3b1696b1), W[13]);
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x9bdc06a725c71235), W[14]);
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xc19bf174cf692694), W[15]);
+ }
+ else /* Combined with the next 'if' */
+#endif /* _MHD_BYTE_ORDER == _MHD_BIG_ENDIAN */
+ if (1)
+ {
+ /* During first 16 steps, before making any calculations on each step,
+ the W element is read from the input data buffer as big-endian value and
+ stored in the array of W elements. */
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0x428a2f98d728ae22), \
+ W[0] = GET_W_FROM_DATA (data, 0));
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x7137449123ef65cd), \
+ W[1] = GET_W_FROM_DATA (data, 1));
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0xb5c0fbcfec4d3b2f), \
+ W[2] = GET_W_FROM_DATA (data, 2));
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0xe9b5dba58189dbbc), \
+ W[3] = GET_W_FROM_DATA (data, 3));
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x3956c25bf348b538), \
+ W[4] = GET_W_FROM_DATA (data, 4));
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x59f111f1b605d019), \
+ W[5] = GET_W_FROM_DATA (data, 5));
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x923f82a4af194f9b), \
+ W[6] = GET_W_FROM_DATA (data, 6));
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xab1c5ed5da6d8118), \
+ W[7] = GET_W_FROM_DATA (data, 7));
+ SHA2STEP64 (a, b, c, d, e, f, g, h, UINT64_C (0xd807aa98a3030242), \
+ W[8] = GET_W_FROM_DATA (data, 8));
+ SHA2STEP64 (h, a, b, c, d, e, f, g, UINT64_C (0x12835b0145706fbe), \
+ W[9] = GET_W_FROM_DATA (data, 9));
+ SHA2STEP64 (g, h, a, b, c, d, e, f, UINT64_C (0x243185be4ee4b28c), \
+ W[10] = GET_W_FROM_DATA (data, 10));
+ SHA2STEP64 (f, g, h, a, b, c, d, e, UINT64_C (0x550c7dc3d5ffb4e2), \
+ W[11] = GET_W_FROM_DATA (data, 11));
+ SHA2STEP64 (e, f, g, h, a, b, c, d, UINT64_C (0x72be5d74f27b896f), \
+ W[12] = GET_W_FROM_DATA (data, 12));
+ SHA2STEP64 (d, e, f, g, h, a, b, c, UINT64_C (0x80deb1fe3b1696b1), \
+ W[13] = GET_W_FROM_DATA (data, 13));
+ SHA2STEP64 (c, d, e, f, g, h, a, b, UINT64_C (0x9bdc06a725c71235), \
+ W[14] = GET_W_FROM_DATA (data, 14));
+ SHA2STEP64 (b, c, d, e, f, g, h, a, UINT64_C (0xc19bf174cf692694), \
+ W[15] = GET_W_FROM_DATA (data, 15));
+ }
/* During last 64 steps, before making any calculations on each step,
current W element is generated from other W elements of the cyclic buffer
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [libmicrohttpd] 05/22: sha256: backported minor optimisations from SHA-512/256, (continued)
- [libmicrohttpd] 05/22: sha256: backported minor optimisations from SHA-512/256, gnunet, 2022/09/25
- [libmicrohttpd] 02/22: Fixed initialisation of old GnuTLS versions, gnunet, 2022/09/25
- [libmicrohttpd] 10/22: md5: added compact code version, gnunet, 2022/09/25
- [libmicrohttpd] 14/22: test_{md5,sha{256,512_256}}: added long test sequence, gnunet, 2022/09/25
- [libmicrohttpd] 07/22: sha512_256: additional trick for compacter code; doxy and comment fixes, gnunet, 2022/09/25
- [libmicrohttpd] 09/22: md5: improved performance of the first round on LE arches, gnunet, 2022/09/25
- [libmicrohttpd] 17/22: digestauth: removed redundant check and report, gnunet, 2022/09/25
- [libmicrohttpd] 12/22: test_sha{256,512_256}: minor fixes, gnunet, 2022/09/25
- [libmicrohttpd] 08/22: md5: replaced public domain MD5 implementation with our own implementation, gnunet, 2022/09/25
- [libmicrohttpd] 20/22: digestauth: refactored hashing asserts, gnunet, 2022/09/25
- [libmicrohttpd] 15/22: sha{256, 512_256}: improved performance of the first steps on BE arches,
gnunet <=
- [libmicrohttpd] 22/22: w32: sync projects with autotools, gnunet, 2022/09/25
- [libmicrohttpd] 11/22: test_{md5,sha256,sha512_256}: added more checks, gnunet, 2022/09/25
- [libmicrohttpd] 16/22: test_{md5,sha{256,512_256}}: re-use of the context structure, gnunet, 2022/09/25
- [libmicrohttpd] 19/22: digestauth: used weak pseudo-random generators to avoid nonces clashes, gnunet, 2022/09/25
- [libmicrohttpd] 21/22: Implemented support for hash calculation by GnuTLS lib functions, gnunet, 2022/09/25
- [libmicrohttpd] 13/22: test_md5: added more test sequences, gnunet, 2022/09/25
- [libmicrohttpd] 18/22: digestauth: changed "slot used" detection logic, gnunet, 2022/09/25