1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
|
/*
* String handling functions for PowerPC.
*
* Copyright (C) 1996 Paul Mackerras.
*
*
* In a mail from Paul on 23.10.2006 05:47:
*
* You may put an LGPL permission statement on that code, replacing the
* GPL permission statement. From a technical point of view, I'm not
* sure that the code in ppcasm_memcpy_cachable.S is the best thing to
* use in userspace, though; for one thing, it has a cache line size
* assumption encoded into it. Why don't you just use the glibc memcpy?
* It's pretty well optimized these days, AFAIK.
*
* Paul.
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#define __ASSEMBLY__
#include <linux/config.h>
#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
#define L1_CACHE_LINE_SIZE 16
#define LG_L1_CACHE_LINE_SIZE 4
#elif defined(CONFIG_PPC64BRIDGE)
#define L1_CACHE_LINE_SIZE 128
#define LG_L1_CACHE_LINE_SIZE 7
#else
#define L1_CACHE_LINE_SIZE 32
#define LG_L1_CACHE_LINE_SIZE 5
#endif
#include "ppc_asm.h"
#define COPY_16_BYTES \
lwz r7,4(r4); \
lwz r8,8(r4); \
lwz r9,12(r4); \
lwzu r10,16(r4); \
stw r7,4(r6); \
stw r8,8(r6); \
stw r9,12(r6); \
stwu r10,16(r6)
#define COPY_16_BYTES_WITHEX(n) \
8 ## n ## 0: \
lwz r7,4(r4); \
8 ## n ## 1: \
lwz r8,8(r4); \
8 ## n ## 2: \
lwz r9,12(r4); \
8 ## n ## 3: \
lwzu r10,16(r4); \
8 ## n ## 4: \
stw r7,4(r6); \
8 ## n ## 5: \
stw r8,8(r6); \
8 ## n ## 6: \
stw r9,12(r6); \
8 ## n ## 7: \
stwu r10,16(r6)
#define COPY_16_BYTES_EXCODE(n) \
9 ## n ## 0: \
addi r5,r5,-(16 * n); \
b 104f; \
9 ## n ## 1: \
addi r5,r5,-(16 * n); \
b 105f; \
.section __ex_table,"a"; \
.align 2; \
.long 8 ## n ## 0b,9 ## n ## 0b; \
.long 8 ## n ## 1b,9 ## n ## 0b; \
.long 8 ## n ## 2b,9 ## n ## 0b; \
.long 8 ## n ## 3b,9 ## n ## 0b; \
.long 8 ## n ## 4b,9 ## n ## 1b; \
.long 8 ## n ## 5b,9 ## n ## 1b; \
.long 8 ## n ## 6b,9 ## n ## 1b; \
.long 8 ## n ## 7b,9 ## n ## 1b; \
.text
.text
CACHELINE_MASK = (L1_CACHE_LINE_SIZE - 1)
.global direct_ppcasm_cacheable_memcpy
direct_ppcasm_cacheable_memcpy:
#if 0 /* this part causes "error loading shared library: unexpected reloc type
0x0b (???) */
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
blt ppcasm_memcpy /* if regions overlap */
#endif
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
beq 58f
cmplw 0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
andi. r8,r0,3 /* get it word-aligned first */
subf r5,r0,r5
mtctr r8
beq+ 61f
70: lbz r9,4(r4) /* do some bytes */
stb r9,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 70b
61: srwi. r0,r0,2
mtctr r0
beq 58f
72: lwzu r9,4(r4) /* do some words */
stwu r9,4(r6)
bdnz 72b
58: srwi. r0,r5,LG_L1_CACHE_LINE_SIZE /* complete cachelines */
clrlwi r5,r5,32-LG_L1_CACHE_LINE_SIZE
li r11,4
mtctr r0
beq 63f
53:
#if !defined(CONFIG_8xx)
dcbz r11,r6
#endif
COPY_16_BYTES
#if L1_CACHE_LINE_SIZE >= 32
COPY_16_BYTES
#if L1_CACHE_LINE_SIZE >= 64
COPY_16_BYTES
COPY_16_BYTES
#if L1_CACHE_LINE_SIZE >= 128
COPY_16_BYTES
COPY_16_BYTES
COPY_16_BYTES
COPY_16_BYTES
#endif
#endif
#endif
bdnz 53b
63: srwi. r0,r5,2
mtctr r0
beq 64f
30: lwzu r0,4(r4)
stwu r0,4(r6)
bdnz 30b
64: andi. r0,r5,3
mtctr r0
beq+ 65f
40: lbz r0,4(r4)
stb r0,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 40b
65: blr
|