e550f5a8f7a164d7ec6fe9017717507fd6d33c1b
[openwrt/staging/wigyori.git] / target / linux / leon / patches-5.15 / 0021-sparc32-leon-Add-fixes-for-leon3ft-b2b-store-errata.patch
1 From 42a65418382690b7199ab23fbe72071da3f6a12d Mon Sep 17 00:00:00 2001
2 From: Andreas Larsson <andreas@gaisler.com>
3 Date: Thu, 22 Sep 2016 15:52:07 +0200
4 Subject: [PATCH 21/32] sparc32: leon: Add fixes for leon3ft b2b store errata
5
6 Signed-off-by: Andreas Larsson <andreas@gaisler.com>
7 ---
8 arch/sparc/include/asm/asmmacro.h | 12 ++++
9 arch/sparc/include/asm/checksum_32.h | 1 +
10 arch/sparc/include/asm/leon.h | 15 ++++-
11 arch/sparc/include/asm/obio.h | 6 +-
12 arch/sparc/include/asm/pgtsrmmu.h | 7 ++-
13 arch/sparc/include/asm/processor_32.h | 14 ++++-
14 arch/sparc/include/asm/psr.h | 3 +
15 arch/sparc/include/asm/sbi.h | 17 +++++-
16 arch/sparc/include/asm/spinlock_32.h | 10 +++-
17 arch/sparc/include/asm/uaccess_32.h | 5 +-
18 arch/sparc/include/asm/winmacro.h | 23 ++++++-
19 arch/sparc/include/asm/xor_32.h | 18 ++++++
20 arch/sparc/kernel/entry.S | 46 +++++++++++---
21 arch/sparc/kernel/etrap_32.S | 1 +
22 arch/sparc/kernel/head_32.S | 19 +++++-
23 arch/sparc/kernel/leon_smp.c | 10 +++-
24 arch/sparc/kernel/sun4d_smp.c | 8 ++-
25 arch/sparc/kernel/una_asm_32.S | 23 ++++---
26 arch/sparc/kernel/wof.S | 4 +-
27 arch/sparc/lib/blockops.S | 17 +++++-
28 arch/sparc/lib/checksum_32.S | 28 +++++----
29 arch/sparc/lib/copy_user.S | 7 ++-
30 arch/sparc/lib/locks.S | 9 +++
31 arch/sparc/lib/memcpy.S | 10 ++--
32 arch/sparc/lib/memset.S | 86 ++++++++++++++++++++++-----
33 arch/sparc/mm/hypersparc.S | 54 +++++++++++++----
34 arch/sparc/mm/leon_mm.c | 14 ++++-
35 arch/sparc/mm/srmmu.c | 5 +-
36 arch/sparc/mm/swift.S | 7 ++-
37 arch/sparc/mm/tsunami.S | 4 +-
38 arch/sparc/mm/viking.S | 43 ++++++++------
39 31 files changed, 417 insertions(+), 109 deletions(-)
40
41 diff --git a/arch/sparc/include/asm/asmmacro.h b/arch/sparc/include/asm/asmmacro.h
42 index 49aaf6f3bc55..687269d581d1 100644
43 --- a/arch/sparc/include/asm/asmmacro.h
44 +++ b/arch/sparc/include/asm/asmmacro.h
45 @@ -43,4 +43,16 @@
46 __VA_ARGS__; \
47 .previous
48
49 +#ifdef __FIX_LEON3FT_B2BST
50 +#define B2B_SINGLE_NOP nop;
51 +#define B2B_DOUBLE_NOP nop; nop;
52 +#define B2B_INLINE_SINGLE_NOP "nop\n\t"
53 +#define B2B_INLINE_DOUBLE_NOP "nop\n\tnop\n\t"
54 +#else
55 +#define B2B_SINGLE_NOP
56 +#define B2B_DOUBLE_NOP
57 +#define B2B_INLINE_SINGLE_NOP ""
58 +#define B2B_INLINE_DOUBLE_NOP ""
59 +#endif
60 +
61 #endif /* !(_SPARC_ASMMACRO_H) */
62 diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
63 index ce11e0ad80c7..981a36b40754 100644
64 --- a/arch/sparc/include/asm/checksum_32.h
65 +++ b/arch/sparc/include/asm/checksum_32.h
66 @@ -18,6 +18,7 @@
67
68 #include <linux/in6.h>
69 #include <linux/uaccess.h>
70 +#include <asm/asmmacro.h>
71
72 /* computes the checksum of a memory block at buff, length len,
73 * and adds in "sum" (32-bit)
74 diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
75 index c1e05e4ab9e3..143c06f8c7bc 100644
76 --- a/arch/sparc/include/asm/leon.h
77 +++ b/arch/sparc/include/asm/leon.h
78 @@ -61,10 +61,15 @@
79
80 #ifndef __ASSEMBLY__
81
82 +#include <asm/asmmacro.h>
83 +
84 /* do a physical address bypass write, i.e. for 0x80000000 */
85 static inline void leon_store_reg(unsigned long paddr, unsigned long value)
86 {
87 - __asm__ __volatile__("sta %0, [%1] %2\n\t" : : "r"(value), "r"(paddr),
88 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
89 + "sta %0, [%1] %2\n\t"
90 + B2B_INLINE_DOUBLE_NOP
91 + : : "r"(value), "r"(paddr),
92 "i"(ASI_LEON_BYPASS) : "memory");
93 }
94
95 @@ -102,7 +107,9 @@ static inline void sparc_leon3_enable_snooping(void)
96 __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
97 "set 0x800000, %%l2\n\t"
98 "or %%l2, %%l1, %%l2\n\t"
99 - "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
100 + "sta %%l2, [%%g0] 2\n\t"
101 + B2B_INLINE_DOUBLE_NOP
102 + : : : "l1", "l2");
103 };
104
105 static inline int sparc_leon3_snooping_enabled(void)
106 @@ -117,7 +124,9 @@ static inline void sparc_leon3_disable_cache(void)
107 __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
108 "set 0x00000f, %%l2\n\t"
109 "andn %%l2, %%l1, %%l2\n\t"
110 - "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
111 + "sta %%l2, [%%g0] 2\n\t"
112 + B2B_INLINE_DOUBLE_NOP
113 + : : : "l1", "l2");
114 };
115
116 static inline unsigned long sparc_leon3_asr17(void)
117 diff --git a/arch/sparc/include/asm/obio.h b/arch/sparc/include/asm/obio.h
118 index 1b151f738b00..122a49968a04 100644
119 --- a/arch/sparc/include/asm/obio.h
120 +++ b/arch/sparc/include/asm/obio.h
121 @@ -112,7 +112,11 @@ static inline int bw_get_intr_mask(int sbus_level)
122
123 static inline void bw_clear_intr_mask(int sbus_level, int mask)
124 {
125 - __asm__ __volatile__ ("stha %0, [%1] %2" : :
126 + /* Not used for LEON. B2B-nops just to make scan script happy. */
127 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
128 + "stha %0, [%1] %2\n\t"
129 + B2B_INLINE_DOUBLE_NOP
130 + : :
131 "r" (mask),
132 "r" (BW_LOCAL_BASE + BW_INTR_TABLE_CLEAR + (sbus_level << 3)),
133 "i" (ASI_M_CTL));
134 diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
135 index 117009b03cf4..5c16cc8f0a38 100644
136 --- a/arch/sparc/include/asm/pgtsrmmu.h
137 +++ b/arch/sparc/include/asm/pgtsrmmu.h
138 @@ -106,6 +106,8 @@
139 restore %g0, %g0, %g0;
140
141 #ifndef __ASSEMBLY__
142 +#include <asm/asmmacro.h>
143 +
144 extern unsigned long last_valid_pfn;
145
146 /* This makes sense. Honest it does - Anton */
147 @@ -127,7 +129,10 @@ unsigned int srmmu_get_faddr(void);
148 /* This is guaranteed on all SRMMU's. */
149 static inline void srmmu_flush_whole_tlb(void)
150 {
151 - __asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
152 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
153 + "sta %%g0, [%0] %1\n\t"
154 + B2B_INLINE_DOUBLE_NOP
155 + : :
156 "r" (0x400), /* Flush entire TLB!! */
157 "i" (ASI_M_FLUSH_PROBE) : "memory");
158
159 diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
160 index 3c4bc2189092..bf7c364caa29 100644
161 --- a/arch/sparc/include/asm/processor_32.h
162 +++ b/arch/sparc/include/asm/processor_32.h
163 @@ -12,6 +12,7 @@
164 #include <asm/head.h>
165 #include <asm/signal.h>
166 #include <asm/page.h>
167 +#include <asm/asmmacro.h>
168
169 /* Whee, this is STACK_TOP + PAGE_SIZE and the lowest kernel address too...
170 * That one page is used to protect kernel from intruders, so that
171 @@ -73,15 +74,24 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
172 regs->npc = regs->pc + 4;
173 regs->y = 0;
174 zero = 0;
175 - __asm__ __volatile__("std\t%%g0, [%0 + %3 + 0x00]\n\t"
176 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
177 + "std\t%%g0, [%0 + %3 + 0x00]\n\t"
178 + B2B_INLINE_SINGLE_NOP
179 "std\t%%g0, [%0 + %3 + 0x08]\n\t"
180 + B2B_INLINE_SINGLE_NOP
181 "std\t%%g0, [%0 + %3 + 0x10]\n\t"
182 + B2B_INLINE_SINGLE_NOP
183 "std\t%%g0, [%0 + %3 + 0x18]\n\t"
184 + B2B_INLINE_SINGLE_NOP
185 "std\t%%g0, [%0 + %3 + 0x20]\n\t"
186 + B2B_INLINE_SINGLE_NOP
187 "std\t%%g0, [%0 + %3 + 0x28]\n\t"
188 + B2B_INLINE_SINGLE_NOP
189 "std\t%%g0, [%0 + %3 + 0x30]\n\t"
190 + B2B_INLINE_SINGLE_NOP
191 "st\t%1, [%0 + %3 + 0x38]\n\t"
192 - "st\t%%g0, [%0 + %3 + 0x3c]"
193 + "st\t%%g0, [%0 + %3 + 0x3c]\n\t"
194 + B2B_INLINE_DOUBLE_NOP
195 : /* no outputs */
196 : "r" (regs),
197 "r" (sp - sizeof(struct reg_window32)),
198 diff --git a/arch/sparc/include/asm/psr.h b/arch/sparc/include/asm/psr.h
199 index 65127ce565ab..4ad45ccfe8d8 100644
200 --- a/arch/sparc/include/asm/psr.h
201 +++ b/arch/sparc/include/asm/psr.h
202 @@ -15,6 +15,8 @@
203
204
205 #ifndef __ASSEMBLY__
206 +#include <asm/asmmacro.h>
207 +
208 /* Get the %psr register. */
209 static inline unsigned int get_psr(void)
210 {
211 @@ -55,6 +57,7 @@ static inline unsigned int get_fsr(void)
212 unsigned int fsr = 0;
213
214 __asm__ __volatile__(
215 + B2B_INLINE_DOUBLE_NOP
216 "st %%fsr, %1\n\t"
217 "ld %1, %0\n\t"
218 : "=r" (fsr)
219 diff --git a/arch/sparc/include/asm/sbi.h b/arch/sparc/include/asm/sbi.h
220 index 4d6026c1e446..49b4e0aa4689 100644
221 --- a/arch/sparc/include/asm/sbi.h
222 +++ b/arch/sparc/include/asm/sbi.h
223 @@ -66,6 +66,8 @@ struct sbi_regs {
224
225 #ifndef __ASSEMBLY__
226
227 +#include <asm/asmmacro.h>
228 +
229 static inline int acquire_sbi(int devid, int mask)
230 {
231 __asm__ __volatile__ ("swapa [%2] %3, %0" :
232 @@ -78,7 +80,10 @@ static inline int acquire_sbi(int devid, int mask)
233
234 static inline void release_sbi(int devid, int mask)
235 {
236 - __asm__ __volatile__ ("sta %0, [%1] %2" : :
237 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
238 + "sta %0, [%1] %2\n\t"
239 + B2B_INLINE_DOUBLE_NOP
240 + : :
241 "r" (mask),
242 "r" (ECSR_DEV_BASE(devid) | SBI_INTR_STATE),
243 "i" (ASI_M_CTL));
244 @@ -86,7 +91,10 @@ static inline void release_sbi(int devid, int mask)
245
246 static inline void set_sbi_tid(int devid, int targetid)
247 {
248 - __asm__ __volatile__ ("sta %0, [%1] %2" : :
249 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
250 + "sta %0, [%1] %2\n\t"
251 + B2B_INLINE_DOUBLE_NOP
252 + : :
253 "r" (targetid),
254 "r" (ECSR_DEV_BASE(devid) | SBI_INTR_TID),
255 "i" (ASI_M_CTL));
256 @@ -105,7 +113,10 @@ static inline int get_sbi_ctl(int devid, int cfgno)
257
258 static inline void set_sbi_ctl(int devid, int cfgno, int cfg)
259 {
260 - __asm__ __volatile__ ("sta %0, [%1] %2" : :
261 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
262 + "sta %0, [%1] %2\n\t"
263 + B2B_INLINE_DOUBLE_NOP
264 + : :
265 "r" (cfg),
266 "r" ((ECSR_DEV_BASE(devid) | SBI_CFG0) + (cfgno<<2)),
267 "i" (ASI_M_CTL));
268 diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
269 index bc5aa6f61676..adade4095cf2 100644
270 --- a/arch/sparc/include/asm/spinlock_32.h
271 +++ b/arch/sparc/include/asm/spinlock_32.h
272 @@ -12,6 +12,7 @@
273 #include <asm/psr.h>
274 #include <asm/barrier.h>
275 #include <asm/processor.h> /* for cpu_relax */
276 +#include <asm/asmmacro.h>
277
278 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
279
280 @@ -47,7 +48,10 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
281
282 static inline void arch_spin_unlock(arch_spinlock_t *lock)
283 {
284 - __asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
285 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
286 + "stb %%g0, [%0]\n\t"
287 + B2B_INLINE_DOUBLE_NOP
288 + : : "r" (lock) : "memory");
289 }
290
291 /* Read-write spinlocks, allowing multiple readers
292 @@ -133,7 +137,9 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
293 static inline void arch_write_unlock(arch_rwlock_t *lock)
294 {
295 __asm__ __volatile__(
296 -" st %%g0, [%0]"
297 +" " B2B_INLINE_DOUBLE_NOP
298 +" st %%g0, [%0]\n"
299 +" " B2B_INLINE_DOUBLE_NOP
300 : /* no outputs */
301 : "r" (lock)
302 : "memory");
303 diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
304 index 0a2d3ebc4bb8..98432ac982ab 100644
305 --- a/arch/sparc/include/asm/uaccess_32.h
306 +++ b/arch/sparc/include/asm/uaccess_32.h
307 @@ -12,6 +12,7 @@
308 #include <linux/string.h>
309
310 #include <asm/processor.h>
311 +#include <asm/asmmacro.h>
312
313 #define ARCH_HAS_SORT_EXTABLE
314 #define ARCH_HAS_SEARCH_EXTABLE
315 @@ -145,8 +146,10 @@ struct __large_struct { unsigned long buf[100]; };
316 #define __put_user_asm(x, size, addr, ret) \
317 __asm__ __volatile__( \
318 "/* Put user asm, inline. */\n" \
319 + B2B_INLINE_DOUBLE_NOP \
320 "1:\t" "st"#size " %1, %2\n\t" \
321 - "clr %0\n" \
322 + "clr %0\n\t" \
323 + B2B_INLINE_SINGLE_NOP \
324 "2:\n\n\t" \
325 ".section .fixup,#alloc,#execinstr\n\t" \
326 ".align 4\n" \
327 diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
328 index b6e911f5d93c..9c6208460a46 100644
329 --- a/arch/sparc/include/asm/winmacro.h
330 +++ b/arch/sparc/include/asm/winmacro.h
331 @@ -9,19 +9,28 @@
332 #define _SPARC_WINMACRO_H
333
334 #include <asm/ptrace.h>
335 +#include <asm/asmmacro.h>
336
337 /* Store the register window onto the 8-byte aligned area starting
338 * at %reg. It might be %sp, it might not, we don't care.
339 */
340 #define STORE_WINDOW(reg) \
341 std %l0, [%reg + RW_L0]; \
342 + B2B_SINGLE_NOP \
343 std %l2, [%reg + RW_L2]; \
344 + B2B_SINGLE_NOP \
345 std %l4, [%reg + RW_L4]; \
346 + B2B_SINGLE_NOP \
347 std %l6, [%reg + RW_L6]; \
348 + B2B_SINGLE_NOP \
349 std %i0, [%reg + RW_I0]; \
350 + B2B_SINGLE_NOP \
351 std %i2, [%reg + RW_I2]; \
352 + B2B_SINGLE_NOP \
353 std %i4, [%reg + RW_I4]; \
354 - std %i6, [%reg + RW_I6];
355 + B2B_SINGLE_NOP \
356 + std %i6, [%reg + RW_I6]; \
357 + B2B_SINGLE_NOP
358
359 /* Load a register window from the area beginning at %reg. */
360 #define LOAD_WINDOW(reg) \
361 @@ -64,17 +73,25 @@
362
363 #define STORE_PT_INS(base_reg) \
364 std %i0, [%base_reg + STACKFRAME_SZ + PT_I0]; \
365 + B2B_SINGLE_NOP \
366 std %i2, [%base_reg + STACKFRAME_SZ + PT_I2]; \
367 + B2B_SINGLE_NOP \
368 std %i4, [%base_reg + STACKFRAME_SZ + PT_I4]; \
369 - std %i6, [%base_reg + STACKFRAME_SZ + PT_I6];
370 + B2B_SINGLE_NOP \
371 + std %i6, [%base_reg + STACKFRAME_SZ + PT_I6]; \
372 + B2B_SINGLE_NOP
373
374 #define STORE_PT_GLOBALS(base_reg) \
375 st %g1, [%base_reg + STACKFRAME_SZ + PT_G1]; \
376 std %g2, [%base_reg + STACKFRAME_SZ + PT_G2]; \
377 + B2B_SINGLE_NOP \
378 std %g4, [%base_reg + STACKFRAME_SZ + PT_G4]; \
379 - std %g6, [%base_reg + STACKFRAME_SZ + PT_G6];
380 + B2B_SINGLE_NOP \
381 + std %g6, [%base_reg + STACKFRAME_SZ + PT_G6]; \
382 + B2B_SINGLE_NOP
383
384 #define STORE_PT_YREG(base_reg, scratch) \
385 + B2B_SINGLE_NOP \
386 rd %y, %scratch; \
387 st %scratch, [%base_reg + STACKFRAME_SZ + PT_Y];
388
389 diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
390 index 3e5af37e4b9c..3c72d9644785 100644
391 --- a/arch/sparc/include/asm/xor_32.h
392 +++ b/arch/sparc/include/asm/xor_32.h
393 @@ -12,6 +12,8 @@
394 * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
395 */
396
397 +#include <asm/asmmacro.h>
398 +
399 static void
400 sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
401 {
402 @@ -36,9 +38,13 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
403 "xor %%o2, %%l4, %%o2\n\t"
404 "xor %%o3, %%l5, %%o3\n\t"
405 "std %%g2, [%0 + 0x00]\n\t"
406 + B2B_INLINE_SINGLE_NOP
407 "std %%g4, [%0 + 0x08]\n\t"
408 + B2B_INLINE_SINGLE_NOP
409 "std %%o0, [%0 + 0x10]\n\t"
410 + B2B_INLINE_SINGLE_NOP
411 "std %%o2, [%0 + 0x18]\n"
412 + B2B_INLINE_SINGLE_NOP
413 :
414 : "r" (p1), "r" (p2)
415 : "g2", "g3", "g4", "g5",
416 @@ -86,9 +92,13 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
417 "xor %%o2, %%l4, %%o2\n\t"
418 "xor %%o3, %%l5, %%o3\n\t"
419 "std %%g2, [%0 + 0x00]\n\t"
420 + B2B_INLINE_SINGLE_NOP
421 "std %%g4, [%0 + 0x08]\n\t"
422 + B2B_INLINE_SINGLE_NOP
423 "std %%o0, [%0 + 0x10]\n\t"
424 + B2B_INLINE_SINGLE_NOP
425 "std %%o2, [%0 + 0x18]\n"
426 + B2B_INLINE_SINGLE_NOP
427 :
428 : "r" (p1), "r" (p2), "r" (p3)
429 : "g2", "g3", "g4", "g5",
430 @@ -149,9 +159,13 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
431 "xor %%o2, %%l4, %%o2\n\t"
432 "xor %%o3, %%l5, %%o3\n\t"
433 "std %%g2, [%0 + 0x00]\n\t"
434 + B2B_INLINE_SINGLE_NOP
435 "std %%g4, [%0 + 0x08]\n\t"
436 + B2B_INLINE_SINGLE_NOP
437 "std %%o0, [%0 + 0x10]\n\t"
438 + B2B_INLINE_SINGLE_NOP
439 "std %%o2, [%0 + 0x18]\n"
440 + B2B_INLINE_SINGLE_NOP
441 :
442 : "r" (p1), "r" (p2), "r" (p3), "r" (p4)
443 : "g2", "g3", "g4", "g5",
444 @@ -225,9 +239,13 @@ sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
445 "xor %%o2, %%l4, %%o2\n\t"
446 "xor %%o3, %%l5, %%o3\n\t"
447 "std %%g2, [%0 + 0x00]\n\t"
448 + B2B_INLINE_SINGLE_NOP
449 "std %%g4, [%0 + 0x08]\n\t"
450 + B2B_INLINE_SINGLE_NOP
451 "std %%o0, [%0 + 0x10]\n\t"
452 + B2B_INLINE_SINGLE_NOP
453 "std %%o2, [%0 + 0x18]\n"
454 + B2B_INLINE_SINGLE_NOP
455 :
456 : "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
457 : "g2", "g3", "g4", "g5",
458 diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
459 index 87c68aeeb794..c0fdf1de10f2 100644
460 --- a/arch/sparc/kernel/entry.S
461 +++ b/arch/sparc/kernel/entry.S
462 @@ -121,6 +121,7 @@ floppy_tdone:
463 sethi %hi(pdma_vaddr), %l5
464 st %l4, [%l5 + %lo(pdma_vaddr)]
465 sethi %hi(pdma_size), %l5
466 + B2B_SINGLE_NOP
467 st %l6, [%l5 + %lo(pdma_size)]
468 /* Flip terminal count pin */
469 set auxio_register, %l7
470 @@ -138,11 +139,13 @@ floppy_tdone:
471 WRITE_PAUSE
472
473 stb %l5, [%l7]
474 + B2B_SINGLE_NOP
475
476 /* Prevent recursion */
477 sethi %hi(doing_pdma), %l7
478 + st %g0, [%l7 + %lo(doing_pdma)]
479 b floppy_dosoftint
480 - st %g0, [%l7 + %lo(doing_pdma)]
481 + nop
482
483 /* We emptied the FIFO, but we haven't read everything
484 * as of yet. Store the current transfer address and
485 @@ -153,6 +156,7 @@ floppy_fifo_emptied:
486 sethi %hi(pdma_vaddr), %l5
487 st %l4, [%l5 + %lo(pdma_vaddr)]
488 sethi %hi(pdma_size), %l7
489 + B2B_SINGLE_NOP
490 st %l6, [%l7 + %lo(pdma_size)]
491
492 /* Restore condition codes */
493 @@ -165,10 +169,12 @@ floppy_fifo_emptied:
494 floppy_overrun:
495 sethi %hi(pdma_vaddr), %l5
496 st %l4, [%l5 + %lo(pdma_vaddr)]
497 + B2B_SINGLE_NOP
498 sethi %hi(pdma_size), %l5
499 st %l6, [%l5 + %lo(pdma_size)]
500 /* Prevent recursion */
501 sethi %hi(doing_pdma), %l7
502 + B2B_SINGLE_NOP
503 st %g0, [%l7 + %lo(doing_pdma)]
504
505 /* fall through... */
506 @@ -323,8 +329,9 @@ linux_trap_ipi15_sun4m:
507 ld [%o5 + %o0], %o5
508 ld [%o5 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending
509 andcc %o3, %o2, %g0
510 + st %o2, [%o5 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x80000000
511 be sun4m_nmi_error ! Must be an NMI async memory error
512 - st %o2, [%o5 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x80000000
513 + nop
514 WRITE_PAUSE
515 ld [%o5 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending
516 WRITE_PAUSE
517 @@ -1024,8 +1031,9 @@ ret_sys_call:
518 ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
519 add %l1, 0x4, %l2 /* npc = npc+4 */
520 st %l1, [%sp + STACKFRAME_SZ + PT_PC]
521 + st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
522 b ret_trap_entry
523 - st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
524 + nop
525 1:
526 /* System call failure, set Carry condition code.
527 * Also, get abs(errno) to return to the process.
528 @@ -1038,8 +1046,9 @@ ret_sys_call:
529 ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
530 add %l1, 0x4, %l2 /* npc = npc+4 */
531 st %l1, [%sp + STACKFRAME_SZ + PT_PC]
532 + st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
533 b ret_trap_entry
534 - st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
535 + nop
536
537 linux_syscall_trace2:
538 add %sp, STACKFRAME_SZ, %o0
539 @@ -1047,8 +1056,9 @@ linux_syscall_trace2:
540 call syscall_trace
541 add %l1, 0x4, %l2 /* npc = npc+4 */
542 st %l1, [%sp + STACKFRAME_SZ + PT_PC]
543 + st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
544 b ret_trap_entry
545 - st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
546 + nop
547
548
549 /* Saving and restoring the FPU state is best done from lowlevel code.
550 @@ -1070,6 +1080,7 @@ fpsave:
551 /* We have an fpqueue to save. */
552 1:
553 std %fq, [%o2]
554 + B2B_SINGLE_NOP
555 fpsave_magic:
556 st %fsr, [%o1]
557 ld [%o1], %g3
558 @@ -1086,22 +1097,39 @@ fpsave_magic:
559 st %g2, [%o3]
560
561 std %f0, [%o0 + 0x00]
562 + B2B_SINGLE_NOP
563 std %f2, [%o0 + 0x08]
564 + B2B_SINGLE_NOP
565 std %f4, [%o0 + 0x10]
566 + B2B_SINGLE_NOP
567 std %f6, [%o0 + 0x18]
568 + B2B_SINGLE_NOP
569 std %f8, [%o0 + 0x20]
570 + B2B_SINGLE_NOP
571 std %f10, [%o0 + 0x28]
572 + B2B_SINGLE_NOP
573 std %f12, [%o0 + 0x30]
574 + B2B_SINGLE_NOP
575 std %f14, [%o0 + 0x38]
576 + B2B_SINGLE_NOP
577 std %f16, [%o0 + 0x40]
578 + B2B_SINGLE_NOP
579 std %f18, [%o0 + 0x48]
580 + B2B_SINGLE_NOP
581 std %f20, [%o0 + 0x50]
582 + B2B_SINGLE_NOP
583 std %f22, [%o0 + 0x58]
584 + B2B_SINGLE_NOP
585 std %f24, [%o0 + 0x60]
586 + B2B_SINGLE_NOP
587 std %f26, [%o0 + 0x68]
588 + B2B_SINGLE_NOP
589 std %f28, [%o0 + 0x70]
590 + B2B_SINGLE_NOP
591 + std %f30, [%o0 + 0x78]
592 + B2B_SINGLE_NOP
593 retl
594 - std %f30, [%o0 + 0x78]
595 + nop
596
597 /* Thanks for Theo Deraadt and the authors of the Sprite/netbsd/openbsd
598 * code for pointing out this possible deadlock, while we save state
599 @@ -1109,8 +1137,9 @@ fpsave_magic:
600 * code has to know how to deal with this.
601 */
602 fpsave_catch:
603 + st %fsr, [%o1]
604 b fpsave_magic + 4
605 - st %fsr, [%o1]
606 + nop
607
608 fpsave_catch2:
609 st %fsr, [%o1] /* In this case, this is the first successful fsr read */
610 @@ -1267,8 +1296,9 @@ kuw_patch1:
611 wr %o5, 0x0, %psr ! re-enable interrupts
612 WRITE_PAUSE ! burn baby burn
613 3:
614 + st %g0, [%g6 + TI_W_SAVED] ! no windows saved
615 retl ! return
616 - st %g0, [%g6 + TI_W_SAVED] ! no windows saved
617 + nop
618
619 .align 4
620 .globl restore_current
621 diff --git a/arch/sparc/kernel/etrap_32.S b/arch/sparc/kernel/etrap_32.S
622 index 9f243f918619..860df075a355 100644
623 --- a/arch/sparc/kernel/etrap_32.S
624 +++ b/arch/sparc/kernel/etrap_32.S
625 @@ -253,6 +253,7 @@ trap_setup_user_stack_is_bolixed:
626 or %glob_tmp, 0x2, %glob_tmp ! or in no_fault bit
627 LEON_PI(sta %glob_tmp, [%g0] ASI_LEON_MMUREGS) ! set it
628 SUN_PI_(sta %glob_tmp, [%g0] ASI_M_MMUREGS) ! set it
629 + B2B_DOUBLE_NOP
630
631 /* Dump the registers and cross fingers. */
632 STORE_WINDOW(sp)
633 diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
634 index be30c8d4cc73..cdff4d974434 100644
635 --- a/arch/sparc/kernel/head_32.S
636 +++ b/arch/sparc/kernel/head_32.S
637 @@ -26,6 +26,7 @@
638 #include <asm/errno.h>
639 #include <asm/pgtable.h> /* PGDIR_SHIFT */
640 #include <asm/export.h>
641 +#include <asm/asmmacro.h>
642
643 .data
644 /* The following are used with the prom_vector node-ops to figure out
645 @@ -365,6 +366,7 @@ execute_in_high_mem:
646
647 sethi %hi(prom_vector_p), %g1
648 st %o0, [%g1 + %lo(prom_vector_p)]
649 + B2B_SINGLE_NOP
650
651 sethi %hi(linux_dbvec), %g1
652 st %o1, [%g1 + %lo(linux_dbvec)]
653 @@ -465,6 +467,7 @@ sun4d_init:
654 srl %g3, 3, %g4
655 sta %g4, [%g0] ASI_M_VIKING_TMP1
656 sethi %hi(boot_cpu_id), %g5
657 + B2B_SINGLE_NOP
658 stb %g4, [%g5 + %lo(boot_cpu_id)]
659 #endif
660
661 @@ -550,6 +553,7 @@ continue_boot:
662 #ifdef CONFIG_SMP
663 st %g6, [%g2]
664 add %g2, %g3, %g2
665 + B2B_SINGLE_NOP
666 #endif
667 st %g6, [%g2]
668
669 @@ -624,21 +628,27 @@ continue_boot:
670 set flush_patch_one, %g5
671 st %g4, [%g5 + 0x18]
672 st %g4, [%g5 + 0x1c]
673 + B2B_SINGLE_NOP
674 set flush_patch_two, %g5
675 st %g4, [%g5 + 0x18]
676 st %g4, [%g5 + 0x1c]
677 + B2B_SINGLE_NOP
678 set flush_patch_three, %g5
679 st %g4, [%g5 + 0x18]
680 st %g4, [%g5 + 0x1c]
681 + B2B_SINGLE_NOP
682 set flush_patch_four, %g5
683 st %g4, [%g5 + 0x18]
684 st %g4, [%g5 + 0x1c]
685 + B2B_SINGLE_NOP
686 set flush_patch_exception, %g5
687 st %g4, [%g5 + 0x18]
688 st %g4, [%g5 + 0x1c]
689 + B2B_SINGLE_NOP
690 set flush_patch_switch, %g5
691 st %g4, [%g5 + 0x18]
692 st %g4, [%g5 + 0x1c]
693 + B2B_SINGLE_NOP
694
695 2:
696 sethi %hi(nwindows), %g4
697 @@ -738,8 +748,9 @@ no_sun4u_here:
698 add %l4, 4, %l4
699 cmp %l5, %l2
700 add %l5, %l6, %l5
701 + st %l5, [%l4 - 4]
702 bgeu,a 3f
703 - st %l5, [%l4 - 4]
704 + nop
705 3:
706 subcc %l3, 4, %l3
707 bne 2b
708 @@ -750,13 +761,15 @@ no_sun4u_here:
709
710 ld [%l1 + (sun4u_r1 - sun4u_a1)], %o1
711 add %l1, (sun4u_a2 - sun4u_a1), %o0
712 + st %o1, [%o0 + (sun4u_i2 - sun4u_a2)]
713 call %l0
714 - st %o1, [%o0 + (sun4u_i2 - sun4u_a2)]
715 + nop
716
717 ld [%l1 + (sun4u_1 - sun4u_a1)], %o1
718 add %l1, (sun4u_a3 - sun4u_a1), %o0
719 - call %l0
720 st %o1, [%o0 + (sun4u_i3 - sun4u_a3)]
721 + call %l0
722 + nop
723
724 call %l0
725 add %l1, (sun4u_a4 - sun4u_a1), %o0
726 diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
727 index 1eed26d423fb..f726d950e347 100644
728 --- a/arch/sparc/kernel/leon_smp.c
729 +++ b/arch/sparc/kernel/leon_smp.c
730 @@ -44,6 +44,7 @@
731 #include <asm/leon.h>
732 #include <asm/leon_amba.h>
733 #include <asm/timer.h>
734 +#include <asm/asmmacro.h>
735
736 #include "kernel.h"
737
738 @@ -391,9 +392,14 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
739 register unsigned long a4 asm("i4") = arg4;
740 register unsigned long a5 asm("i5") = 0;
741
742 - __asm__ __volatile__("std %0, [%6]\n\t"
743 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
744 + "std %0, [%6]\n\t"
745 + B2B_INLINE_SINGLE_NOP
746 "std %2, [%6 + 8]\n\t"
747 - "std %4, [%6 + 16]\n\t" : :
748 + B2B_INLINE_SINGLE_NOP
749 + "std %4, [%6 + 16]\n\t"
750 + B2B_INLINE_SINGLE_NOP
751 + : :
752 "r"(f), "r"(a1), "r"(a2), "r"(a3),
753 "r"(a4), "r"(a5),
754 "r"(&ccall_info.func));
755 diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
756 index ff30f03beb7c..b06eaf4d1256 100644
757 --- a/arch/sparc/kernel/sun4d_smp.c
758 +++ b/arch/sparc/kernel/sun4d_smp.c
759 @@ -21,6 +21,7 @@
760 #include <asm/oplib.h>
761 #include <asm/sbi.h>
762 #include <asm/mmu.h>
763 +#include <asm/asmmacro.h>
764
765 #include "kernel.h"
766 #include "irq.h"
767 @@ -304,9 +305,14 @@ static void sun4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
768 register unsigned long a5 asm("i5") = 0;
769
770 __asm__ __volatile__(
771 + B2B_INLINE_DOUBLE_NOP
772 "std %0, [%6]\n\t"
773 + B2B_INLINE_SINGLE_NOP
774 "std %2, [%6 + 8]\n\t"
775 - "std %4, [%6 + 16]\n\t" : :
776 + B2B_INLINE_SINGLE_NOP
777 + "std %4, [%6 + 16]\n\t"
778 + B2B_INLINE_SINGLE_NOP
779 + : :
780 "r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
781 "r" (&ccall_info.func));
782 }
783 diff --git a/arch/sparc/kernel/una_asm_32.S b/arch/sparc/kernel/una_asm_32.S
784 index f8bf839289fb..177011bebe3e 100644
785 --- a/arch/sparc/kernel/una_asm_32.S
786 +++ b/arch/sparc/kernel/una_asm_32.S
787 @@ -6,6 +6,7 @@
788 */
789
790 #include <linux/errno.h>
791 +#include <asm/asmmacro.h>
792
793 .text
794
795 @@ -32,26 +33,30 @@ __do_int_store:
796 srl %g1, 24, %g2
797 srl %g1, 16, %g7
798 4: stb %g2, [%o0]
799 - srl %g1, 8, %g2
800 5: stb %g7, [%o0 + 1]
801 + srl %g1, 8, %g2
802 ld [%o2 + 4], %g7
803 6: stb %g2, [%o0 + 2]
804 - srl %g7, 24, %g2
805 7: stb %g1, [%o0 + 3]
806 + srl %g7, 24, %g2
807 srl %g7, 16, %g1
808 8: stb %g2, [%o0 + 4]
809 srl %g7, 8, %g2
810 + B2B_SINGLE_NOP
811 9: stb %g1, [%o0 + 5]
812 10: stb %g2, [%o0 + 6]
813 +11: stb %g7, [%o0 + 7]
814 b 0f
815 -11: stb %g7, [%o0 + 7]
816 -1: srl %g1, 16, %g7
817 + nop
818 +1:
819 12: stb %g2, [%o0]
820 + srl %g1, 16, %g7
821 srl %g1, 8, %g2
822 13: stb %g7, [%o0 + 1]
823 14: stb %g2, [%o0 + 2]
824 +15: stb %g1, [%o0 + 3]
825 b 0f
826 -15: stb %g1, [%o0 + 3]
827 + nop
828 2: srl %g1, 8, %g2
829 16: stb %g2, [%o0]
830 17: stb %g1, [%o0 + 1]
831 @@ -99,8 +104,9 @@ do_int_load:
832 or %g1, %g2, %g1
833 sll %g1, 16, %g1
834 sra %g1, 16, %g1
835 -3: b 0f
836 - st %g1, [%o0]
837 +3: st %g1, [%o0]
838 + b 0f
839 + nop
840 6: ldub [%o2 + 1], %g2
841 sll %g1, 24, %g1
842 7: ldub [%o2 + 2], %g7
843 @@ -110,8 +116,9 @@ do_int_load:
844 or %g3, %g2, %g3
845 or %g7, %g3, %g7
846 or %g1, %g7, %g1
847 + st %g1, [%o0]
848 b 0f
849 - st %g1, [%o0]
850 + nop
851 9: ldub [%o2], %g1
852 10: ldub [%o2 + 1], %g2
853 sll %g1, 24, %g1
854 diff --git a/arch/sparc/kernel/wof.S b/arch/sparc/kernel/wof.S
855 index 96a3a112423a..8538818424c0 100644
856 --- a/arch/sparc/kernel/wof.S
857 +++ b/arch/sparc/kernel/wof.S
858 @@ -124,6 +124,8 @@ spwin_no_userwins_from_kernel:
859 jmp %t_pc ! Return from trap
860 rett %t_npc ! we are done
861
862 + B2B_SINGLE_NOP ! To not trigger delay slot warning
863 +
864 spwin_exist_uwins:
865 /* LOCATION: Trap window */
866
867 @@ -341,7 +343,7 @@ SUN_PI_(lda [%g0] ASI_M_MMUREGS, %glob_tmp) ! read MMU control
868 or %glob_tmp, 0x2, %glob_tmp ! or in no_fault bit
869 LEON_PI(sta %glob_tmp, [%g0] ASI_LEON_MMUREGS) ! set it
870 SUN_PI_(sta %glob_tmp, [%g0] ASI_M_MMUREGS) ! set it
871 -
872 + B2B_DOUBLE_NOP
873 /* Dump the registers and cross fingers. */
874 STORE_WINDOW(sp)
875
876 diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S
877 index 76ddd1ff6833..9f66d08ff8a3 100644
878 --- a/arch/sparc/lib/blockops.S
879 +++ b/arch/sparc/lib/blockops.S
880 @@ -8,19 +8,28 @@
881 #include <linux/linkage.h>
882 #include <asm/page.h>
883 #include <asm/export.h>
884 +#include <asm/asmmacro.h>
885
886 /* Zero out 64 bytes of memory at (buf + offset).
887 * Assumes %g1 contains zero.
888 */
889 #define BLAST_BLOCK(buf, offset) \
890 std %g0, [buf + offset + 0x38]; \
891 + B2B_SINGLE_NOP \
892 std %g0, [buf + offset + 0x30]; \
893 + B2B_SINGLE_NOP \
894 std %g0, [buf + offset + 0x28]; \
895 + B2B_SINGLE_NOP \
896 std %g0, [buf + offset + 0x20]; \
897 + B2B_SINGLE_NOP \
898 std %g0, [buf + offset + 0x18]; \
899 + B2B_SINGLE_NOP \
900 std %g0, [buf + offset + 0x10]; \
901 + B2B_SINGLE_NOP \
902 std %g0, [buf + offset + 0x08]; \
903 - std %g0, [buf + offset + 0x00];
904 + B2B_SINGLE_NOP \
905 + std %g0, [buf + offset + 0x00]; \
906 + B2B_SINGLE_NOP
907
908 /* Copy 32 bytes of memory at (src + offset) to
909 * (dst + offset).
910 @@ -31,9 +40,13 @@
911 ldd [src + offset + 0x08], t4; \
912 ldd [src + offset + 0x00], t6; \
913 std t0, [dst + offset + 0x18]; \
914 + B2B_SINGLE_NOP \
915 std t2, [dst + offset + 0x10]; \
916 + B2B_SINGLE_NOP \
917 std t4, [dst + offset + 0x08]; \
918 - std t6, [dst + offset + 0x00];
919 + B2B_SINGLE_NOP \
920 + std t6, [dst + offset + 0x00]; \
921 + B2B_SINGLE_NOP
922
923 /* Profiling evidence indicates that memset() is
924 * commonly called for blocks of size PAGE_SIZE,
925 diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
926 index 7488d130faf7..1f5b2daf2d51 100644
927 --- a/arch/sparc/lib/checksum_32.S
928 +++ b/arch/sparc/lib/checksum_32.S
929 @@ -190,39 +190,47 @@ cpout: retl ! get outta here
930 * because of this we thus do all the ldd's together to get
931 * Viking MXCC into streaming mode. Ho hum...
932 */
933 + /* B2B-FIX-NOTE: The fixup section is affected only by number of
934 + * instructions and where the load instructions are located in this
935 + * macro. Neither of those factors have been changed.
936 + */
937 #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
938 ldd [src + off + 0x00], t0; \
939 ldd [src + off + 0x08], t2; \
940 ldd [src + off + 0x10], t4; \
941 ldd [src + off + 0x18], t6; \
942 st t0, [dst + off + 0x00]; \
943 - addxcc t0, sum, sum; \
944 st t1, [dst + off + 0x04]; \
945 + addxcc t0, sum, sum; \
946 addxcc t1, sum, sum; \
947 st t2, [dst + off + 0x08]; \
948 - addxcc t2, sum, sum; \
949 st t3, [dst + off + 0x0c]; \
950 + addxcc t2, sum, sum; \
951 addxcc t3, sum, sum; \
952 st t4, [dst + off + 0x10]; \
953 - addxcc t4, sum, sum; \
954 st t5, [dst + off + 0x14]; \
955 + addxcc t4, sum, sum; \
956 addxcc t5, sum, sum; \
957 st t6, [dst + off + 0x18]; \
958 - addxcc t6, sum, sum; \
959 st t7, [dst + off + 0x1c]; \
960 + addxcc t6, sum, sum; \
961 addxcc t7, sum, sum;
962
963 /* Yuck, 6 superscalar cycles... */
964 + /* B2B-FIX-NOTE: The fixup section is affected only by number of
965 + * instructions and where the load instructions are located in this
966 + * macro. Neither of those factors have been changed.
967 + */
968 #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
969 ldd [src - off - 0x08], t0; \
970 ldd [src - off - 0x00], t2; \
971 addxcc t0, sum, sum; \
972 - st t0, [dst - off - 0x08]; \
973 addxcc t1, sum, sum; \
974 + st t0, [dst - off - 0x08]; \
975 st t1, [dst - off - 0x04]; \
976 addxcc t2, sum, sum; \
977 - st t2, [dst - off - 0x00]; \
978 addxcc t3, sum, sum; \
979 + st t2, [dst - off - 0x00]; \
980 st t3, [dst - off + 0x04];
981
982 /* Handle the end cruft code out of band for better cache patterns. */
983 @@ -399,8 +407,8 @@ ccslow: cmp %g1, 0
984 sub %g1, 2, %g1
985 srl %o4, 8, %g2
986 sub %g4, 1, %g4
987 - EX(stb %g2, [%o1])
988 add %o4, %g5, %g5
989 + EX(stb %g2, [%o1])
990 EX(stb %o4, [%o1 + 1])
991 add %o0, 2, %o0
992 srl %g4, 1, %g4
993 @@ -413,10 +421,10 @@ ccslow: cmp %g1, 0
994 srl %o4, 16, %g3
995 EX(stb %g2, [%o1])
996 srl %o4, 8, %g2
997 - EX(stb %g3, [%o1 + 1])
998 add %o0, 4, %o0
999 - EX(stb %g2, [%o1 + 2])
1000 addcc %o4, %g5, %g5
1001 + EX(stb %g3, [%o1 + 1])
1002 + EX(stb %g2, [%o1 + 2])
1003 EX(stb %o4, [%o1 + 3])
1004 addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it
1005 add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
1006 @@ -435,8 +443,8 @@ ccslow: cmp %g1, 0
1007 srl %o4, 8, %g2
1008 add %o0, 2, %o0
1009 EX(stb %g2, [%o1])
1010 - add %g5, %o4, %g5
1011 EX(stb %o4, [%o1 + 1])
1012 + add %g5, %o4, %g5
1013 add %o1, 2, %o1
1014 3: be,a 1f
1015 sll %g5, 16, %o4
1016 diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
1017 index dc72f2b970b7..b7cd5165497d 100644
1018 --- a/arch/sparc/lib/copy_user.S
1019 +++ b/arch/sparc/lib/copy_user.S
1020 @@ -17,6 +17,7 @@
1021 #include <asm/page.h>
1022 #include <asm/thread_info.h>
1023 #include <asm/export.h>
1024 +#include <asm/asmmacro.h>
1025
1026 /* Work around cpp -rob */
1027 #define ALLOC #alloc
1028 @@ -82,12 +83,12 @@
1029
1030 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
1031 ldd [%src + (offset) + 0x00], %t0; \
1032 - ldd [%src + (offset) + 0x08], %t2; \
1033 - ldd [%src + (offset) + 0x10], %t4; \
1034 - ldd [%src + (offset) + 0x18], %t6; \
1035 std %t0, [%dst + (offset) + 0x00]; \
1036 + ldd [%src + (offset) + 0x08], %t2; \
1037 std %t2, [%dst + (offset) + 0x08]; \
1038 + ldd [%src + (offset) + 0x10], %t4; \
1039 std %t4, [%dst + (offset) + 0x10]; \
1040 + ldd [%src + (offset) + 0x18], %t6; \
1041 std %t6, [%dst + (offset) + 0x18];
1042
1043 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
1044 diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S
1045 index 9a1289a3fb28..066717755d67 100644
1046 --- a/arch/sparc/lib/locks.S
1047 +++ b/arch/sparc/lib/locks.S
1048 @@ -92,7 +92,16 @@ ___rw_write_enter:
1049 bne ___rw_write_enter_spin_on_wlock
1050 ld [%g1], %g2
1051 andncc %g2, 0xff, %g0
1052 +#ifdef __FIX_LEON3FT_B2BST
1053 + be 1f
1054 + nop
1055 + stb %g0, [%g1 + 3]
1056 + b ___rw_write_enter_spin_on_wlock
1057 + nop
1058 +1:
1059 +#else
1060 bne,a ___rw_write_enter_spin_on_wlock
1061 stb %g0, [%g1 + 3]
1062 +#endif
1063 retl
1064 mov %g4, %o7
1065 diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
1066 index ee823d8c9215..dac6d6f0fe3c 100644
1067 --- a/arch/sparc/lib/memcpy.S
1068 +++ b/arch/sparc/lib/memcpy.S
1069 @@ -32,12 +32,12 @@ x:
1070
1071 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
1072 ldd [%src + (offset) + 0x00], %t0; \
1073 - ldd [%src + (offset) + 0x08], %t2; \
1074 - ldd [%src + (offset) + 0x10], %t4; \
1075 - ldd [%src + (offset) + 0x18], %t6; \
1076 std %t0, [%dst + (offset) + 0x00]; \
1077 + ldd [%src + (offset) + 0x08], %t2; \
1078 std %t2, [%dst + (offset) + 0x08]; \
1079 + ldd [%src + (offset) + 0x10], %t4; \
1080 std %t4, [%dst + (offset) + 0x10]; \
1081 + ldd [%src + (offset) + 0x18], %t6; \
1082 std %t6, [%dst + (offset) + 0x18];
1083
1084 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
1085 @@ -50,8 +50,8 @@ x:
1086
1087 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
1088 ldd [%src - (offset) - 0x10], %t0; \
1089 - ldd [%src - (offset) - 0x08], %t2; \
1090 std %t0, [%dst - (offset) - 0x10]; \
1091 + ldd [%src - (offset) - 0x08], %t2; \
1092 std %t2, [%dst - (offset) - 0x08];
1093
1094 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
1095 @@ -192,8 +192,8 @@ EXPORT_SYMBOL(memcpy)
1096
1097 ldd [%o1], %g2
1098 add %o0, 8, %o0
1099 - st %g2, [%o0 - 0x08]
1100 add %o1, 8, %o1
1101 + st %g2, [%o0 - 0x08]
1102 st %g3, [%o0 - 0x04]
1103
1104 81: /* memcpy_last7 */
1105 diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
1106 index f427f34b8b79..77ea205b8d66 100644
1107 --- a/arch/sparc/lib/memset.S
1108 +++ b/arch/sparc/lib/memset.S
1109 @@ -11,6 +11,7 @@
1110
1111 #include <asm/ptrace.h>
1112 #include <asm/export.h>
1113 +#include <asm/asmmacro.h>
1114
1115 /* Work around cpp -rob */
1116 #define ALLOC #alloc
1117 @@ -39,23 +40,39 @@
1118 * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
1119 #define ZERO_BIG_BLOCK(base, offset, source) \
1120 std source, [base + offset + 0x00]; \
1121 + B2B_SINGLE_NOP \
1122 std source, [base + offset + 0x08]; \
1123 + B2B_SINGLE_NOP \
1124 std source, [base + offset + 0x10]; \
1125 + B2B_SINGLE_NOP \
1126 std source, [base + offset + 0x18]; \
1127 + B2B_SINGLE_NOP \
1128 std source, [base + offset + 0x20]; \
1129 + B2B_SINGLE_NOP \
1130 std source, [base + offset + 0x28]; \
1131 + B2B_SINGLE_NOP \
1132 std source, [base + offset + 0x30]; \
1133 - std source, [base + offset + 0x38];
1134 + B2B_SINGLE_NOP \
1135 + std source, [base + offset + 0x38]; \
1136 + B2B_SINGLE_NOP
1137
1138 #define ZERO_LAST_BLOCKS(base, offset, source) \
1139 std source, [base - offset - 0x38]; \
1140 + B2B_SINGLE_NOP \
1141 std source, [base - offset - 0x30]; \
1142 + B2B_SINGLE_NOP \
1143 std source, [base - offset - 0x28]; \
1144 + B2B_SINGLE_NOP \
1145 std source, [base - offset - 0x20]; \
1146 + B2B_SINGLE_NOP \
1147 std source, [base - offset - 0x18]; \
1148 + B2B_SINGLE_NOP \
1149 std source, [base - offset - 0x10]; \
1150 + B2B_SINGLE_NOP \
1151 std source, [base - offset - 0x08]; \
1152 - std source, [base - offset - 0x00];
1153 + B2B_SINGLE_NOP \
1154 + std source, [base - offset - 0x00]; \
1155 + B2B_SINGLE_NOP
1156
1157 .text
1158 .align 4
1159 @@ -82,12 +99,14 @@ memset:
1160 mov %o2, %o1
1161 3:
1162 cmp %o2, 3
1163 + EX(stb %g3, [%o0], sub %o1, 0)
1164 be 2f
1165 - EX(stb %g3, [%o0], sub %o1, 0)
1166 + nop
1167
1168 cmp %o2, 2
1169 + EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
1170 be 2f
1171 - EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
1172 + nop
1173
1174 EX(stb %g3, [%o0 + 0x02], sub %o1, 2)
1175 2:
1176 @@ -132,7 +151,11 @@ __bzero:
1177 be 13f
1178 andcc %o1, 7, %o1
1179
1180 - srl %o2, 1, %o3
1181 +#ifdef __FIX_LEON3FT_B2BST
1182 + mov %o2, %o3 /* 8 bytes of std+nop sets 8 bytes of memory */
1183 +#else
1184 + srl %o2, 1, %o3 /* 4 bytes of std sets 8 bytes of memory */
1185 +#endif
1186 set 13f, %o4
1187 sub %o4, %o3, %o4
1188 jmp %o4
1189 @@ -158,8 +181,9 @@ __bzero:
1190 EX(sth %g3, [%o0], and %o1, 3)
1191 add %o0, 2, %o0
1192 1:
1193 - bne,a 8f
1194 - EX(stb %g3, [%o0], and %o1, 1)
1195 + be 8f
1196 + nop
1197 + EX(stb %g3, [%o0], and %o1, 1)
1198 8:
1199 b 0f
1200 nop
1201 @@ -171,8 +195,9 @@ __bzero:
1202 8:
1203 add %o0, 1, %o0
1204 subcc %o1, 1, %o1
1205 + EX(stb %g3, [%o0 - 1], add %o1, 1)
1206 bne 8b
1207 - EX(stb %g3, [%o0 - 1], add %o1, 1)
1208 + nop
1209 0:
1210 andcc %g4, 1, %g0
1211 be 5f
1212 @@ -180,23 +205,56 @@ __bzero:
1213 retl
1214 mov %g1, %o0
1215 5:
1216 + clr %o0
1217 retl
1218 - clr %o0
1219 + nop
1220 __memset_end:
1221
1222 .section .fixup,#alloc,#execinstr
1223 .align 4
1224 20:
1225 + /*
1226 + * We got a fault in the 10: to 11: address range.
1227 + *
1228 + * At this point:
1229 + * - %g2 now contains the index (within the range) of the instruction that
1230 + * got the fault.
1231 + * - %o1 contains the number of bytes that were left to set/zero before
1232 + * entering the loop the first time.
1233 + * - %l3 contains the number of bytes left for the loop to set/zero
1234 + * (but adjusted in the middle of the loop)
1235 + *
1236 + */
1237 +#ifdef __FIX_LEON3FT_B2BST
1238 + cmp %g2, 16 /* Double number of instructions per half */
1239 +#else
1240 cmp %g2, 8
1241 +#endif
1242 bleu 1f
1243 and %o1, 0x7f, %o1
1244 - sub %g2, 9, %g2
1245 - add %o3, 64, %o3
1246 + /* We were in second half of the 10: to 11: block */
1247 +#ifdef __FIX_LEON3FT_B2BST
1248 + sub %g2, 17, %g2 /* Adjust index: 8 std + nop pairs + one subcc */
1249 +#else
1250 + sub %g2, 9, %g2 /* Adjust index to start of ZERO_BIG_BLOCK */
1251 +#endif
1252 + add %o3, 64, %o3 /* Adjust bytes left in turn of the loop */
1253 + /* (due to the subcc being in the middle ) */
1254 1:
1255 - sll %g2, 3, %g2
1256 - add %o3, %o1, %o0
1257 + /*
1258 + * Convert index of faulting instruction within ZERO_BIG_BLOCK to
1259 + * number of bytes written
1260 + */
1261 +#ifdef __FIX_LEON3FT_B2BST
1262 + sll %g2, 2, %g2 /* 8 bytes is written per 2 instructions (std+nop) */
1263 +#else
1264 + sll %g2, 3, %g2 /* 8 bytes is written per std instruction */
1265 +#endif
1266 + add %o3, %o1, %o0 /* Bytes left before faulting ZERO_BIG_BLOCK */
1267 b 30f
1268 - sub %o0, %g2, %o0
1269 + sub %o0, %g2, %o0 /* Subtract bytes written by the faulting */
1270 + /* ZERO_BIG_BLOCK => the number of bytes */
1271 + /* that were not set/zeroed. */
1272 21:
1273 mov 8, %o0
1274 and %o1, 7, %o1
1275 diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
1276 index 6c2521e85a42..513ea55441b7 100644
1277 --- a/arch/sparc/mm/hypersparc.S
1278 +++ b/arch/sparc/mm/hypersparc.S
1279 @@ -13,6 +13,7 @@
1280 #include <asm/pgtable.h>
1281 #include <asm/pgtsrmmu.h>
1282 #include <linux/init.h>
1283 +#include <asm/asmmacro.h>
1284
1285 .text
1286 .align 4
1287 @@ -32,10 +33,12 @@ hypersparc_flush_cache_all:
1288 ld [%g1 + %lo(vac_line_size)], %g2
1289 1:
1290 subcc %g5, %g2, %g5 ! hyper_flush_unconditional_combined
1291 + sta %g0, [%g5] ASI_M_FLUSH_CTX
1292 bne 1b
1293 - sta %g0, [%g5] ASI_M_FLUSH_CTX
1294 + nop
1295 + sta %g0, [%g0] ASI_M_FLUSH_IWHOLE ! hyper_flush_whole_icache
1296 retl
1297 - sta %g0, [%g0] ASI_M_FLUSH_IWHOLE ! hyper_flush_whole_icache
1298 + nop
1299
1300 /* We expand the window flush to get maximum performance. */
1301 hypersparc_flush_cache_mm:
1302 @@ -68,8 +71,9 @@ hypersparc_flush_cache_mm:
1303 sta %g0, [%o0 + %g3] ASI_M_FLUSH_USER
1304 sta %g0, [%o0 + %g4] ASI_M_FLUSH_USER
1305 sta %g0, [%o0 + %g5] ASI_M_FLUSH_USER
1306 + sta %g0, [%o0 + %o4] ASI_M_FLUSH_USER
1307 bne 1b
1308 - sta %g0, [%o0 + %o4] ASI_M_FLUSH_USER
1309 + nop
1310 hypersparc_flush_cache_mm_out:
1311 retl
1312 nop
1313 @@ -117,8 +121,9 @@ hypersparc_flush_cache_range:
1314 sta %g0, [%o3 + %g2] ASI_M_FLUSH_USER
1315 sta %g0, [%o3 + %g3] ASI_M_FLUSH_USER
1316 sta %g0, [%o3 + %g4] ASI_M_FLUSH_USER
1317 + sta %g0, [%o3 + %g5] ASI_M_FLUSH_USER
1318 bne 1b
1319 - sta %g0, [%o3 + %g5] ASI_M_FLUSH_USER
1320 + nop
1321 retl
1322 nop
1323
1324 @@ -145,9 +150,11 @@ hypersparc_flush_cache_range:
1325 sta %g0, [%o2 + %g2] ASI_M_FLUSH_PAGE
1326 sta %g0, [%o2 + %g3] ASI_M_FLUSH_PAGE
1327 andcc %o2, 0xffc, %g0
1328 + B2B_SINGLE_NOP
1329 sta %g0, [%o2 + %g4] ASI_M_FLUSH_PAGE
1330 + sta %g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
1331 bne 2b
1332 - sta %g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
1333 + nop
1334 3:
1335 cmp %o2, %o1
1336 bne 1b
1337 @@ -202,9 +209,11 @@ hypersparc_flush_cache_page:
1338 sta %g0, [%o1 + %g2] ASI_M_FLUSH_PAGE
1339 sta %g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
1340 andcc %o1, 0xffc, %g0
1341 + B2B_SINGLE_NOP
1342 sta %g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
1343 + sta %g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
1344 bne 1b
1345 - sta %g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
1346 + nop
1347 2:
1348 mov SRMMU_FAULT_STATUS, %g7
1349 mov SRMMU_CTX_REG, %g4
1350 @@ -247,9 +256,11 @@ hypersparc_flush_page_to_ram:
1351 sta %g0, [%o0 + %g2] ASI_M_FLUSH_PAGE
1352 sta %g0, [%o0 + %g3] ASI_M_FLUSH_PAGE
1353 andcc %o0, 0xffc, %g0
1354 + B2B_SINGLE_NOP
1355 sta %g0, [%o0 + %g4] ASI_M_FLUSH_PAGE
1356 + sta %g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
1357 bne 1b
1358 - sta %g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
1359 + nop
1360 2:
1361 mov SRMMU_FAULT_STATUS, %g1
1362 retl
1363 @@ -282,8 +293,9 @@ hypersparc_flush_tlb_mm:
1364 sta %o1, [%g1] ASI_M_MMUREGS
1365 sta %g0, [%g2] ASI_M_FLUSH_PROBE
1366 hypersparc_flush_tlb_mm_out:
1367 + sta %g5, [%g1] ASI_M_MMUREGS
1368 retl
1369 - sta %g5, [%g1] ASI_M_MMUREGS
1370 + nop
1371
1372 hypersparc_flush_tlb_range:
1373 ld [%o0 + VMA_VM_MM], %o0
1374 @@ -298,15 +310,16 @@ hypersparc_flush_tlb_range:
1375 sta %o3, [%g1] ASI_M_MMUREGS
1376 and %o1, %o4, %o1
1377 add %o1, 0x200, %o1
1378 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1379 1:
1380 + sta %g0, [%o1] ASI_M_FLUSH_PROBE
1381 sub %o1, %o4, %o1
1382 cmp %o1, %o2
1383 - blu,a 1b
1384 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1385 + blu 1b
1386 + nop
1387 hypersparc_flush_tlb_range_out:
1388 + sta %g5, [%g1] ASI_M_MMUREGS
1389 retl
1390 - sta %g5, [%g1] ASI_M_MMUREGS
1391 + nop
1392
1393 hypersparc_flush_tlb_page:
1394 ld [%o0 + VMA_VM_MM], %o0
1395 @@ -321,8 +334,9 @@ hypersparc_flush_tlb_page:
1396 sta %o3, [%g1] ASI_M_MMUREGS
1397 sta %g0, [%o1] ASI_M_FLUSH_PROBE
1398 hypersparc_flush_tlb_page_out:
1399 + sta %g5, [%g1] ASI_M_MMUREGS
1400 retl
1401 - sta %g5, [%g1] ASI_M_MMUREGS
1402 + nop
1403
1404 __INIT
1405
1406 @@ -340,12 +354,19 @@ hypersparc_bzero_1page:
1407 mov 16, %o1
1408 1:
1409 stda %g0, [%o0 + %g0] ASI_M_BFILL
1410 + B2B_SINGLE_NOP
1411 stda %g0, [%o0 + %g2] ASI_M_BFILL
1412 + B2B_SINGLE_NOP
1413 stda %g0, [%o0 + %g3] ASI_M_BFILL
1414 + B2B_SINGLE_NOP
1415 stda %g0, [%o0 + %g4] ASI_M_BFILL
1416 + B2B_SINGLE_NOP
1417 stda %g0, [%o0 + %g5] ASI_M_BFILL
1418 + B2B_SINGLE_NOP
1419 stda %g0, [%o0 + %g7] ASI_M_BFILL
1420 + B2B_SINGLE_NOP
1421 stda %g0, [%o0 + %o2] ASI_M_BFILL
1422 + B2B_SINGLE_NOP
1423 stda %g0, [%o0 + %o3] ASI_M_BFILL
1424 subcc %o1, 1, %o1
1425 bne 1b
1426 @@ -361,17 +382,24 @@ hypersparc_copy_1page:
1427 1:
1428 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1429 add %o0, 32, %o0
1430 + B2B_SINGLE_NOP
1431 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1432 add %o0, 32, %o0
1433 + B2B_SINGLE_NOP
1434 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1435 add %o0, 32, %o0
1436 + B2B_SINGLE_NOP
1437 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1438 add %o0, 32, %o0
1439 + B2B_SINGLE_NOP
1440 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1441 add %o0, 32, %o0
1442 + B2B_SINGLE_NOP
1443 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1444 + B2B_SINGLE_NOP
1445 add %o0, 32, %o0
1446 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1447 + B2B_SINGLE_NOP
1448 add %o0, 32, %o0
1449 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1450 subcc %g1, 1, %g1
1451 diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
1452 index f8ac99759ed3..c2e0b2905035 100644
1453 --- a/arch/sparc/mm/leon_mm.c
1454 +++ b/arch/sparc/mm/leon_mm.c
1455 @@ -16,6 +16,7 @@
1456 #include <asm/leon.h>
1457 #include <asm/tlbflush.h>
1458 #include <asm/pgtsrmmu.h>
1459 +#include <asm/asmmacro.h>
1460
1461 #include "mm_32.h"
1462
1463 @@ -188,7 +189,10 @@ void leon_flush_icache_all(void)
1464
1465 void leon_flush_dcache_all(void)
1466 {
1467 - __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
1468 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
1469 + "sta %%g0, [%%g0] %0\n\t"
1470 + B2B_INLINE_DOUBLE_NOP
1471 + : :
1472 "i"(ASI_LEON_DFLUSH) : "memory");
1473 }
1474
1475 @@ -201,15 +205,21 @@ void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page)
1476
1477 void leon_flush_cache_all(void)
1478 {
1479 + __asm__ __volatile__(B2B_INLINE_SINGLE_NOP);
1480 __asm__ __volatile__(".align 32\nflush\n.align 32\n"); /*iflush*/
1481 __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
1482 "i"(ASI_LEON_DFLUSH) : "memory");
1483 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP);
1484 +
1485 }
1486
1487 void leon_flush_tlb_all(void)
1488 {
1489 leon_flush_cache_all();
1490 - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : "r"(0x400),
1491 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
1492 + "sta %%g0, [%0] %1\n\t"
1493 + B2B_INLINE_DOUBLE_NOP
1494 + : : "r"(0x400),
1495 "i"(ASI_LEON_MMUFLUSH) : "memory");
1496 }
1497
1498 diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
1499 index 4ab2e43e93a1..0d523aaf2c1c 100644
1500 --- a/arch/sparc/mm/srmmu.c
1501 +++ b/arch/sparc/mm/srmmu.c
1502 @@ -26,6 +26,7 @@
1503 #include <asm/mmu_context.h>
1504 #include <asm/cacheflush.h>
1505 #include <asm/tlbflush.h>
1506 +#include <asm/asmmacro.h>
1507 #include <asm/io-unit.h>
1508 #include <asm/pgalloc.h>
1509 #include <asm/pgtable.h>
1510 @@ -129,7 +130,9 @@ static void msi_set_sync(void)
1511 {
1512 __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t"
1513 "andn %%g3, %2, %%g3\n\t"
1514 - "sta %%g3, [%0] %1\n\t" : :
1515 + "sta %%g3, [%0] %1\n\t"
1516 + B2B_INLINE_DOUBLE_NOP
1517 + : :
1518 "r" (MSI_MBUS_ARBEN),
1519 "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3");
1520 }
1521 diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S
1522 index f414bfd8d899..303b86ff3864 100644
1523 --- a/arch/sparc/mm/swift.S
1524 +++ b/arch/sparc/mm/swift.S
1525 @@ -10,6 +10,7 @@
1526 #include <asm/page.h>
1527 #include <asm/pgtsrmmu.h>
1528 #include <asm/asm-offsets.h>
1529 +#include <asm/asmmacro.h>
1530
1531 .text
1532 .align 4
1533 @@ -32,8 +33,9 @@ swift_flush_page_to_ram:
1534 1: subcc %o0, 0x10, %o0
1535 add %o0, %o0, %o1
1536 sta %g0, [%o0] ASI_M_DATAC_TAG
1537 + sta %g0, [%o1] ASI_M_TXTC_TAG
1538 bne 1b
1539 - sta %g0, [%o1] ASI_M_TXTC_TAG
1540 + nop
1541 retl
1542 nop
1543 #else
1544 @@ -46,8 +48,9 @@ swift_flush_cache_all:
1545 sethi %hi(16 * 1024), %o0
1546 1: subcc %o0, 16, %o0
1547 sta %g0, [%o0] ASI_M_TXTC_TAG
1548 + sta %g0, [%o0] ASI_M_DATAC_TAG
1549 bne 1b
1550 - sta %g0, [%o0] ASI_M_DATAC_TAG
1551 + nop
1552 retl
1553 nop
1554
1555 diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S
1556 index 62b742df65dc..fc0c18e74408 100644
1557 --- a/arch/sparc/mm/tsunami.S
1558 +++ b/arch/sparc/mm/tsunami.S
1559 @@ -11,6 +11,7 @@
1560 #include <asm/asi.h>
1561 #include <asm/page.h>
1562 #include <asm/pgtsrmmu.h>
1563 +#include <asm/asmmacro.h>
1564
1565 .text
1566 .align 4
1567 @@ -81,8 +82,9 @@ tsunami_flush_tlb_page:
1568 nop
1569 nop
1570 tsunami_flush_tlb_page_out:
1571 + sta %g5, [%g1] ASI_M_MMUREGS
1572 retl
1573 - sta %g5, [%g1] ASI_M_MMUREGS
1574 + nop
1575
1576 #define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3) \
1577 ldd [src + offset + 0x18], t0; \
1578 diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
1579 index 48f062de7a7f..de913516fdeb 100644
1580 --- a/arch/sparc/mm/viking.S
1581 +++ b/arch/sparc/mm/viking.S
1582 @@ -16,6 +16,7 @@
1583 #include <asm/pgtable.h>
1584 #include <asm/pgtsrmmu.h>
1585 #include <asm/viking.h>
1586 +#include <asm/asmmacro.h>
1587
1588 #ifdef CONFIG_SMP
1589 .data
1590 @@ -99,8 +100,8 @@ viking_mxcc_flush_page:
1591 sub %g3, MXCC_STREAM_SIZE, %g3
1592 6:
1593 stda %g2, [%o2] ASI_M_MXCC
1594 - stda %g2, [%o3] ASI_M_MXCC
1595 andncc %g3, PAGE_MASK, %g0
1596 + stda %g2, [%o3] ASI_M_MXCC
1597 bne 6b
1598 sub %g3, MXCC_STREAM_SIZE, %g3
1599
1600 @@ -128,8 +129,9 @@ viking_flush_cache_out:
1601
1602 viking_flush_tlb_all:
1603 mov 0x400, %g1
1604 + sta %g0, [%g1] ASI_M_FLUSH_PROBE
1605 retl
1606 - sta %g0, [%g1] ASI_M_FLUSH_PROBE
1607 + nop
1608
1609 viking_flush_tlb_mm:
1610 mov SRMMU_CTX_REG, %g1
1611 @@ -142,8 +144,9 @@ viking_flush_tlb_mm:
1612 mov 0x300, %g2
1613 sta %o1, [%g1] ASI_M_MMUREGS
1614 sta %g0, [%g2] ASI_M_FLUSH_PROBE
1615 + sta %g5, [%g1] ASI_M_MMUREGS
1616 retl
1617 - sta %g5, [%g1] ASI_M_MMUREGS
1618 + nop
1619 #ifndef CONFIG_SMP
1620 1: retl
1621 nop
1622 @@ -162,13 +165,14 @@ viking_flush_tlb_range:
1623 sta %o3, [%g1] ASI_M_MMUREGS
1624 and %o1, %o4, %o1
1625 add %o1, 0x200, %o1
1626 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1627 -1: sub %o1, %o4, %o1
1628 +1: sta %g0, [%o1] ASI_M_FLUSH_PROBE
1629 + sub %o1, %o4, %o1
1630 cmp %o1, %o2
1631 - blu,a 1b
1632 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1633 + blu 1b
1634 + nop
1635 + sta %g5, [%g1] ASI_M_MMUREGS
1636 retl
1637 - sta %g5, [%g1] ASI_M_MMUREGS
1638 + nop
1639 #ifndef CONFIG_SMP
1640 2: retl
1641 nop
1642 @@ -186,8 +190,9 @@ viking_flush_tlb_page:
1643 and %o1, PAGE_MASK, %o1
1644 sta %o3, [%g1] ASI_M_MMUREGS
1645 sta %g0, [%o1] ASI_M_FLUSH_PROBE
1646 + sta %g5, [%g1] ASI_M_MMUREGS
1647 retl
1648 - sta %g5, [%g1] ASI_M_MMUREGS
1649 + nop
1650 #ifndef CONFIG_SMP
1651 1: retl
1652 nop
1653 @@ -209,8 +214,9 @@ sun4dsmp_flush_tlb_all:
1654 bne 2f
1655 mov 0x400, %g1
1656 sta %g0, [%g1] ASI_M_FLUSH_PROBE
1657 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1658 retl
1659 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1660 + nop
1661 2: tst %g5
1662 bne,a 2b
1663 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
1664 @@ -228,8 +234,9 @@ sun4dsmp_flush_tlb_mm:
1665 sta %o1, [%g1] ASI_M_MMUREGS
1666 sta %g0, [%g2] ASI_M_FLUSH_PROBE
1667 sta %g5, [%g1] ASI_M_MMUREGS
1668 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1669 retl
1670 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1671 + nop
1672 2: tst %g5
1673 bne,a 2b
1674 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
1675 @@ -248,14 +255,15 @@ sun4dsmp_flush_tlb_range:
1676 sta %o3, [%g1] ASI_M_MMUREGS
1677 and %o1, %o4, %o1
1678 add %o1, 0x200, %o1
1679 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1680 -2: sub %o1, %o4, %o1
1681 +2: sta %g0, [%o1] ASI_M_FLUSH_PROBE
1682 + sub %o1, %o4, %o1
1683 cmp %o1, %o2
1684 - blu,a 2b
1685 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1686 + blu 2b
1687 + nop
1688 sta %g5, [%g1] ASI_M_MMUREGS
1689 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1690 retl
1691 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1692 + nop
1693 3: tst %g5
1694 bne,a 3b
1695 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
1696 @@ -274,8 +282,9 @@ sun4dsmp_flush_tlb_page:
1697 sta %o3, [%g1] ASI_M_MMUREGS
1698 sta %g0, [%o1] ASI_M_FLUSH_PROBE
1699 sta %g5, [%g1] ASI_M_MMUREGS
1700 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1701 retl
1702 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1703 + nop
1704 2: tst %g5
1705 bne,a 2b
1706 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
1707 --
1708 2.34.1
1709