From 7fe60435bce6595a9c58a9bfd8244d74b5320e96 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Tue, 15 Jan 2013 08:46:13 +0100 Subject: Import DirectFB141_2k11R3_beta5 --- .../DirectFB/lib/direct/ppcasm_memcpy_cachable.S | 180 +++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100755 Source/DirectFB/lib/direct/ppcasm_memcpy_cachable.S (limited to 'Source/DirectFB/lib/direct/ppcasm_memcpy_cachable.S') diff --git a/Source/DirectFB/lib/direct/ppcasm_memcpy_cachable.S b/Source/DirectFB/lib/direct/ppcasm_memcpy_cachable.S new file mode 100755 index 0000000..920dea2 --- /dev/null +++ b/Source/DirectFB/lib/direct/ppcasm_memcpy_cachable.S @@ -0,0 +1,180 @@ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * + * In a mail from Paul on 23.10.2006 05:47: + * + * You may put an LGPL permission statement on that code, replacing the + * GPL permission statement. From a technical point of view, I'm not + * sure that the code in ppcasm_memcpy_cachable.S is the best thing to + * use in userspace, though; for one thing, it has a cache line size + * assumption encoded into it. Why don't you just use the glibc memcpy? + * It's pretty well optimized these days, AFAIK. + * + * Paul. + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#define __ASSEMBLY__ + +#include + +#if defined(CONFIG_8xx) || defined(CONFIG_403GCX) +#define L1_CACHE_LINE_SIZE 16 +#define LG_L1_CACHE_LINE_SIZE 4 +#elif defined(CONFIG_PPC64BRIDGE) +#define L1_CACHE_LINE_SIZE 128 +#define LG_L1_CACHE_LINE_SIZE 7 +#else +#define L1_CACHE_LINE_SIZE 32 +#define LG_L1_CACHE_LINE_SIZE 5 +#endif + +#include "ppc_asm.h" + +#define COPY_16_BYTES \ + lwz r7,4(r4); \ + lwz r8,8(r4); \ + lwz r9,12(r4); \ + lwzu r10,16(r4); \ + stw r7,4(r6); \ + stw r8,8(r6); \ + stw r9,12(r6); \ + stwu r10,16(r6) + +#define COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ +8 ## n ## 5: \ + stw r8,8(r6); \ +8 ## n ## 6: \ + stw r9,12(r6); \ +8 ## n ## 7: \ + stwu r10,16(r6) + +#define COPY_16_BYTES_EXCODE(n) \ +9 ## n ## 0: \ + addi r5,r5,-(16 * n); \ + b 104f; \ +9 ## n ## 1: \ + addi r5,r5,-(16 * n); \ + b 105f; \ +.section __ex_table,"a"; \ + .align 2; \ + .long 8 ## n ## 0b,9 ## n ## 0b; \ + .long 8 ## n ## 1b,9 ## n ## 0b; \ + .long 8 ## n ## 2b,9 ## n ## 0b; \ + .long 8 ## n ## 3b,9 ## n ## 0b; \ + .long 8 ## n ## 4b,9 ## n ## 1b; \ + .long 8 ## n ## 5b,9 ## n ## 1b; \ + .long 8 ## n ## 6b,9 ## n ## 1b; \ + .long 8 ## n ## 7b,9 ## n ## 1b; \ + .text + + .text + + +CACHELINE_MASK = (L1_CACHE_LINE_SIZE - 1) + + .global direct_ppcasm_cacheable_memcpy +direct_ppcasm_cacheable_memcpy: +#if 0 /* this part causes "error loading shared library: unexpected reloc type + 0x0b (???) */ + add r7,r3,r5 /* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt ppcasm_memcpy /* if regions overlap */ +#endif + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subf r5,r0,r5 + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ + stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_L1_CACHE_LINE_SIZE /* complete cachelines */ + clrlwi r5,r5,32-LG_L1_CACHE_LINE_SIZE + li r11,4 + mtctr r0 + beq 63f +53: +#if !defined(CONFIG_8xx) + dcbz r11,r6 +#endif + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 53b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) + stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: blr + -- cgit