From 3c1fdfa7fb55307e0b21bea63b7f976efbe139df Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Thu, 14 Oct 2004 05:26:35 +0000
Subject: r2960: during a lunchtime discussion I found out that all powerpc
 processors can correctly (and quickly!) handle unaligned little endian memory
 accesses, just like i386. This should reduce code size and speeds things up
 quite a lot on ppc, at the expense of some inline asm code (whcih means it
 only works with gcc) (This used to be commit
 2a0c427c2bf2f8b0739f12c78151b819388c44d4)

---
 source4/include/byteorder.h | 49 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 2 deletions(-)

(limited to 'source4/include')

diff --git a/source4/include/byteorder.h b/source4/include/byteorder.h
index bf68ee7df7..5137b78e4c 100644
--- a/source4/include/byteorder.h
+++ b/source4/include/byteorder.h
@@ -93,11 +93,45 @@ it also defines lots of intermediate macros, just ignore those :-)
 
 */
 
+
+/*
+  on powerpc we can use the magic instructions to load/store
+  in little endian
+*/
+#if (defined(__powerpc__) && defined(__GNUC__))
+static __inline__ uint16_t ld_le16(const uint16_t *addr)
+{
+	uint16_t val;
+	__asm__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+	return val;
+}
+
+static __inline__ void st_le16(uint16_t *addr, const uint16_t val)
+{
+	__asm__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static __inline__ uint32_t ld_le32(const uint32_t *addr)
+{
+	uint32_t val;
+	__asm__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+	return val;
+}
+
+static __inline__ void st_le32(uint32_t *addr, const uint32_t val)
+{
+	__asm__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+#define HAVE_ASM_BYTEORDER 1
+#endif
+
+
+
 #undef CAREFUL_ALIGNMENT
 
 /* we know that the 386 can handle misalignment and has the "right" 
    byteorder */
-#ifdef __i386__
+#if defined(__i386__)
 #define CAREFUL_ALIGNMENT 0
 #endif
 
@@ -110,8 +144,19 @@ it also defines lots of intermediate macros, just ignore those :-)
 #define PVAL(buf,pos) (CVAL(buf,pos))
 #define SCVAL(buf,pos,val) (CVAL_NC(buf,pos) = (val))
 
+#if HAVE_ASM_BYTEORDER
+
+#define  _PTRPOS(buf,pos) (((const uint8_t *)buf)+(pos))
+#define SVAL(buf,pos) ld_le16((const uint16_t *)_PTRPOS(buf,pos))
+#define IVAL(buf,pos) ld_le32((const uint32_t *)_PTRPOS(buf,pos))
+#define SSVAL(buf,pos,val) st_le16((uint16_t *)__PTRPOS(buf,pos), val)
+#define SIVAL(buf,pos,val) st_le32((uint32_t *)__PTRPOS(buf,pos), val)
+#define SVALS(buf,pos) ((int16_t)SVAL(buf,pos))
+#define IVALS(buf,pos) ((int32_t)IVAL(buf,pos))
+#define SSVALS(buf,pos,val) SSVAL((buf),(pos),((int16_t)(val)))
+#define SIVALS(buf,pos,val) SIVAL((buf),(pos),((int32_t)(val)))
 
-#if CAREFUL_ALIGNMENT
+#elif CAREFUL_ALIGNMENT
 
 #define SVAL(buf,pos) (PVAL(buf,pos)|PVAL(buf,(pos)+1)<<8)
 #define IVAL(buf,pos) (SVAL(buf,pos)|SVAL(buf,(pos)+2)<<16)
-- 
cgit