/* * String handling functions for PowerPC. * * Copyright (C) 1996 Paul Mackerras. * * * In a mail from Paul on 23.10.2006 05:47: * * You may put an LGPL permission statement on that code, replacing the * GPL permission statement. From a technical point of view, I'm not * sure that the code in ppcasm_memcpy_cachable.S is the best thing to * use in userspace, though; for one thing, it has a cache line size * assumption encoded into it. Why don't you just use the glibc memcpy? * It's pretty well optimized these days, AFAIK. * * Paul. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #define __ASSEMBLY__ #include #if defined(CONFIG_8xx) || defined(CONFIG_403GCX) #define L1_CACHE_LINE_SIZE 16 #define LG_L1_CACHE_LINE_SIZE 4 #elif defined(CONFIG_PPC64BRIDGE) #define L1_CACHE_LINE_SIZE 128 #define LG_L1_CACHE_LINE_SIZE 7 #else #define L1_CACHE_LINE_SIZE 32 #define LG_L1_CACHE_LINE_SIZE 5 #endif #include "ppc_asm.h" #define COPY_16_BYTES \ lwz r7,4(r4); \ lwz r8,8(r4); \ lwz r9,12(r4); \ lwzu r10,16(r4); \ stw r7,4(r6); \ stw r8,8(r6); \ stw r9,12(r6); \ stwu r10,16(r6) #define COPY_16_BYTES_WITHEX(n) \ 8 ## n ## 0: \ lwz r7,4(r4); \ 8 ## n ## 1: \ lwz r8,8(r4); \ 8 ## n ## 2: \ lwz r9,12(r4); \ 8 ## n ## 3: \ lwzu r10,16(r4); \ 8 ## n ## 4: \ stw r7,4(r6); \ 8 ## n ## 5: \ stw r8,8(r6); \ 8 ## n ## 6: \ stw r9,12(r6); \ 8 ## n ## 7: \ stwu r10,16(r6) #define COPY_16_BYTES_EXCODE(n) \ 9 ## n ## 0: \ addi r5,r5,-(16 * n); \ b 104f; \ 9 ## n ## 1: \ addi r5,r5,-(16 * n); \ b 105f; \ .section __ex_table,"a"; \ .align 2; \ .long 8 ## n ## 0b,9 ## n ## 0b; \ .long 8 ## n ## 1b,9 ## n ## 0b; \ .long 8 ## n ## 2b,9 ## n ## 0b; \ .long 8 ## n ## 3b,9 ## n ## 0b; \ .long 8 ## n ## 4b,9 ## n ## 1b; \ .long 8 ## n ## 5b,9 ## n ## 1b; \ .long 8 ## n ## 6b,9 ## n ## 1b; \ .long 8 ## n ## 7b,9 ## n ## 1b; \ .text .text CACHELINE_MASK = (L1_CACHE_LINE_SIZE - 1) .global direct_ppcasm_cacheable_memcpy direct_ppcasm_cacheable_memcpy: #if 0 /* this part causes "error loading shared library: unexpected reloc type 0x0b (???) */ add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 cmplw 1,r3,r8 crand 0,0,4 /* cr0.lt &= cr1.lt */ blt ppcasm_memcpy /* if regions overlap */ #endif addi r4,r4,-4 addi r6,r3,-4 neg r0,r3 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ beq 58f cmplw 0,r5,r0 /* is this more than total to do? */ blt 63f /* if not much to do */ andi. r8,r0,3 /* get it word-aligned first */ subf r5,r0,r5 mtctr r8 beq+ 61f 70: lbz r9,4(r4) /* do some bytes */ stb r9,4(r6) addi r4,r4,1 addi r6,r6,1 bdnz 70b 61: srwi. r0,r0,2 mtctr r0 beq 58f 72: lwzu r9,4(r4) /* do some words */ stwu r9,4(r6) bdnz 72b 58: srwi. r0,r5,LG_L1_CACHE_LINE_SIZE /* complete cachelines */ clrlwi r5,r5,32-LG_L1_CACHE_LINE_SIZE li r11,4 mtctr r0 beq 63f 53: #if !defined(CONFIG_8xx) dcbz r11,r6 #endif COPY_16_BYTES #if L1_CACHE_LINE_SIZE >= 32 COPY_16_BYTES #if L1_CACHE_LINE_SIZE >= 64 COPY_16_BYTES COPY_16_BYTES #if L1_CACHE_LINE_SIZE >= 128 COPY_16_BYTES COPY_16_BYTES COPY_16_BYTES COPY_16_BYTES #endif #endif #endif bdnz 53b 63: srwi. r0,r5,2 mtctr r0 beq 64f 30: lwzu r0,4(r4) stwu r0,4(r6) bdnz 30b 64: andi. r0,r5,3 mtctr r0 beq+ 65f 40: lbz r0,4(r4) stb r0,4(r6) addi r4,r4,1 addi r6,r6,1 bdnz 40b 65: blr