From 3c1fdfa7fb55307e0b21bea63b7f976efbe139df Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 14 Oct 2004 05:26:35 +0000 Subject: r2960: during a lunchtime discussion I found out that all powerpc processors can correctly (and quickly!) handle unaligned little endian memory accesses, just like i386. This should reduce code size and speeds things up quite a lot on ppc, at the expense of some inline asm code (whcih means it only works with gcc) (This used to be commit 2a0c427c2bf2f8b0739f12c78151b819388c44d4) --- source4/include/byteorder.h | 49 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'source4/include') diff --git a/source4/include/byteorder.h b/source4/include/byteorder.h index bf68ee7df7..5137b78e4c 100644 --- a/source4/include/byteorder.h +++ b/source4/include/byteorder.h @@ -93,11 +93,45 @@ it also defines lots of intermediate macros, just ignore those :-) */ + +/* + on powerpc we can use the magic instructions to load/store + in little endian +*/ +#if (defined(__powerpc__) && defined(__GNUC__)) +static __inline__ uint16_t ld_le16(const uint16_t *addr) +{ + uint16_t val; + __asm__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); + return val; +} + +static __inline__ void st_le16(uint16_t *addr, const uint16_t val) +{ + __asm__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); +} + +static __inline__ uint32_t ld_le32(const uint32_t *addr) +{ + uint32_t val; + __asm__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); + return val; +} + +static __inline__ void st_le32(uint32_t *addr, const uint32_t val) +{ + __asm__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); +} +#define HAVE_ASM_BYTEORDER 1 +#endif + + + #undef CAREFUL_ALIGNMENT /* we know that the 386 can handle misalignment and has the "right" byteorder */ -#ifdef __i386__ +#if defined(__i386__) #define CAREFUL_ALIGNMENT 0 #endif @@ -110,8 +144,19 @@ it also defines lots of intermediate macros, just ignore those :-) #define PVAL(buf,pos) (CVAL(buf,pos)) #define SCVAL(buf,pos,val) (CVAL_NC(buf,pos) = (val)) +#if HAVE_ASM_BYTEORDER + +#define _PTRPOS(buf,pos) (((const uint8_t *)buf)+(pos)) +#define SVAL(buf,pos) ld_le16((const uint16_t *)_PTRPOS(buf,pos)) +#define IVAL(buf,pos) ld_le32((const uint32_t *)_PTRPOS(buf,pos)) +#define SSVAL(buf,pos,val) st_le16((uint16_t *)__PTRPOS(buf,pos), val) +#define SIVAL(buf,pos,val) st_le32((uint32_t *)__PTRPOS(buf,pos), val) +#define SVALS(buf,pos) ((int16_t)SVAL(buf,pos)) +#define IVALS(buf,pos) ((int32_t)IVAL(buf,pos)) +#define SSVALS(buf,pos,val) SSVAL((buf),(pos),((int16_t)(val))) +#define SIVALS(buf,pos,val) SIVAL((buf),(pos),((int32_t)(val))) -#if CAREFUL_ALIGNMENT +#elif CAREFUL_ALIGNMENT #define SVAL(buf,pos) (PVAL(buf,pos)|PVAL(buf,(pos)+1)<<8) #define IVAL(buf,pos) (SVAL(buf,pos)|SVAL(buf,(pos)+2)<<16) -- cgit