block-sha1: support for architectures with memory alignment restrictions

This is needed on architectures with poor or non-existent unaligned memory support and/or no fast byte swap instruction (such as ARM) by using byte accesses to memory and shifting the result together. This also makes the code portable, therefore the byte access methods are the defaults. Any architecture that properly supports unaligned word accesses in hardware simply has to enable the alternative methods. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
author: Nicolas Pitre <nico@cam.org> 2009-08-12 15:47:55 -0400
committer: Junio C Hamano <gitster@pobox.com> 2009-08-12 13:36:32 -0700
commit: 660231aa9727d29c7d2c16319bc6a3fa8bed3e0e (patch)
tree: 5059ca749124a89a404f193a7b97c648ba4d16b2 /block-sha1
parent: block-sha1: split the different "hacks" to be individually selected (diff)
download: tgif-660231aa9727d29c7d2c16319bc6a3fa8bed3e0e.tar.xz
1 files changed, 30 insertions, 2 deletions
diff --git a/block-sha1/sha1.c b/block-sha1/sha1.c
index 67c9bd0723..d3121f7a02 100644
--- a/block-sha1/sha1.c
+++ b/block-sha1/sha1.c
@@ -60,6 +60,34 @@
   #define setW(x, val) (W(x) = (val))
 #endif
 
+/*
+ * Performance might be improved if the CPU architecture is OK with
+ * unaligned 32-bit loads and a fast ntohl() is available.
+ * Otherwise fall back to byte loads and shifts which is portable,
+ * and is faster on architectures with memory alignment issues.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#define get_be32(p)	ntohl(*(unsigned int *)(p))
+#define put_be32(p, v)	do { *(unsigned int *)(p) = htonl(v); } while (0)
+
+#else
+
+#define get_be32(p)	( \
+	(*((unsigned char *)(p) + 0) << 24) | \
+	(*((unsigned char *)(p) + 1) << 16) | \
+	(*((unsigned char *)(p) + 2) <<  8) | \
+	(*((unsigned char *)(p) + 3) <<  0) )
+#define put_be32(p, v)	do { \
+	unsigned int __v = (v); \
+	*((unsigned char *)(p) + 0) = __v >> 24; \
+	*((unsigned char *)(p) + 1) = __v >> 16; \
+	*((unsigned char *)(p) + 2) = __v >>  8; \
+	*((unsigned char *)(p) + 3) = __v >>  0; } while (0)
+
+#endif
+
 /* This "rolls" over the 512-bit array */
 #define W(x) (array[(x)&15])
 
@@ -67,7 +95,7 @@
  * Where do we get the source from? The first 16 iterations get it from
  * the input data, the next mix it from the 512-bit array.
  */
-#define SHA_SRC(t) htonl(data[t])
+#define SHA_SRC(t) get_be32(data + t)
 #define SHA_MIX(t) SHA_ROL(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
 
 #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
@@ -245,5 +273,5 @@ void blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx)
 
 	/* Output hash */
 	for (i = 0; i < 5; i++)
-		((unsigned int *)hashout)[i] = htonl(ctx->H[i]);
+		put_be32(hashout + i*4, ctx->H[i]);
 }
author	Nicolas Pitre <nico@cam.org>	2009-08-12 15:47:55 -0400
committer	Junio C Hamano <gitster@pobox.com>	2009-08-12 13:36:32 -0700
commit	660231aa9727d29c7d2c16319bc6a3fa8bed3e0e (patch)
tree	5059ca749124a89a404f193a7b97c648ba4d16b2 /block-sha1
parent	block-sha1: split the different "hacks" to be individually selected (diff)
download	tgif-660231aa9727d29c7d2c16319bc6a3fa8bed3e0e.tar.xz