1 From 42a65418382690b7199ab23fbe72071da3f6a12d Mon Sep 17 00:00:00 2001
2 From: Andreas Larsson <andreas@gaisler.com>
3 Date: Thu, 22 Sep 2016 15:52:07 +0200
4 Subject: [PATCH 21/32] sparc32: leon: Add fixes for leon3ft b2b store errata
6 Signed-off-by: Andreas Larsson <andreas@gaisler.com>
8 arch/sparc/include/asm/asmmacro.h | 12 ++++
9 arch/sparc/include/asm/checksum_32.h | 1 +
10 arch/sparc/include/asm/leon.h | 15 ++++-
11 arch/sparc/include/asm/obio.h | 6 +-
12 arch/sparc/include/asm/pgtsrmmu.h | 7 ++-
13 arch/sparc/include/asm/processor_32.h | 14 ++++-
14 arch/sparc/include/asm/psr.h | 3 +
15 arch/sparc/include/asm/sbi.h | 17 +++++-
16 arch/sparc/include/asm/spinlock_32.h | 10 +++-
17 arch/sparc/include/asm/uaccess_32.h | 5 +-
18 arch/sparc/include/asm/winmacro.h | 23 ++++++-
19 arch/sparc/include/asm/xor_32.h | 18 ++++++
20 arch/sparc/kernel/entry.S | 46 +++++++++++---
21 arch/sparc/kernel/etrap_32.S | 1 +
22 arch/sparc/kernel/head_32.S | 19 +++++-
23 arch/sparc/kernel/leon_smp.c | 10 +++-
24 arch/sparc/kernel/sun4d_smp.c | 8 ++-
25 arch/sparc/kernel/una_asm_32.S | 23 ++++---
26 arch/sparc/kernel/wof.S | 4 +-
27 arch/sparc/lib/blockops.S | 17 +++++-
28 arch/sparc/lib/checksum_32.S | 28 +++++----
29 arch/sparc/lib/copy_user.S | 7 ++-
30 arch/sparc/lib/locks.S | 9 +++
31 arch/sparc/lib/memcpy.S | 10 ++--
32 arch/sparc/lib/memset.S | 86 ++++++++++++++++++++++-----
33 arch/sparc/mm/hypersparc.S | 54 +++++++++++++----
34 arch/sparc/mm/leon_mm.c | 14 ++++-
35 arch/sparc/mm/srmmu.c | 5 +-
36 arch/sparc/mm/swift.S | 7 ++-
37 arch/sparc/mm/tsunami.S | 4 +-
38 arch/sparc/mm/viking.S | 43 ++++++++------
39 31 files changed, 417 insertions(+), 109 deletions(-)
41 diff --git a/arch/sparc/include/asm/asmmacro.h b/arch/sparc/include/asm/asmmacro.h
42 index 49aaf6f3bc55..687269d581d1 100644
43 --- a/arch/sparc/include/asm/asmmacro.h
44 +++ b/arch/sparc/include/asm/asmmacro.h
49 +#ifdef __FIX_LEON3FT_B2BST
50 +#define B2B_SINGLE_NOP nop;
51 +#define B2B_DOUBLE_NOP nop; nop;
52 +#define B2B_INLINE_SINGLE_NOP "nop\n\t"
53 +#define B2B_INLINE_DOUBLE_NOP "nop\n\tnop\n\t"
55 +#define B2B_SINGLE_NOP
56 +#define B2B_DOUBLE_NOP
57 +#define B2B_INLINE_SINGLE_NOP ""
58 +#define B2B_INLINE_DOUBLE_NOP ""
61 #endif /* !(_SPARC_ASMMACRO_H) */
62 diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
63 index ce11e0ad80c7..981a36b40754 100644
64 --- a/arch/sparc/include/asm/checksum_32.h
65 +++ b/arch/sparc/include/asm/checksum_32.h
68 #include <linux/in6.h>
69 #include <linux/uaccess.h>
70 +#include <asm/asmmacro.h>
72 /* computes the checksum of a memory block at buff, length len,
73 * and adds in "sum" (32-bit)
74 diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
75 index c1e05e4ab9e3..143c06f8c7bc 100644
76 --- a/arch/sparc/include/asm/leon.h
77 +++ b/arch/sparc/include/asm/leon.h
82 +#include <asm/asmmacro.h>
84 /* do a physical address bypass write, i.e. for 0x80000000 */
85 static inline void leon_store_reg(unsigned long paddr, unsigned long value)
87 - __asm__ __volatile__("sta %0, [%1] %2\n\t" : : "r"(value), "r"(paddr),
88 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
89 + "sta %0, [%1] %2\n\t"
90 + B2B_INLINE_DOUBLE_NOP
91 + : : "r"(value), "r"(paddr),
92 "i"(ASI_LEON_BYPASS) : "memory");
95 @@ -102,7 +107,9 @@ static inline void sparc_leon3_enable_snooping(void)
96 __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
97 "set 0x800000, %%l2\n\t"
98 "or %%l2, %%l1, %%l2\n\t"
99 - "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
100 + "sta %%l2, [%%g0] 2\n\t"
101 + B2B_INLINE_DOUBLE_NOP
105 static inline int sparc_leon3_snooping_enabled(void)
106 @@ -117,7 +124,9 @@ static inline void sparc_leon3_disable_cache(void)
107 __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
108 "set 0x00000f, %%l2\n\t"
109 "andn %%l2, %%l1, %%l2\n\t"
110 - "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
111 + "sta %%l2, [%%g0] 2\n\t"
112 + B2B_INLINE_DOUBLE_NOP
116 static inline unsigned long sparc_leon3_asr17(void)
117 diff --git a/arch/sparc/include/asm/obio.h b/arch/sparc/include/asm/obio.h
118 index 1b151f738b00..122a49968a04 100644
119 --- a/arch/sparc/include/asm/obio.h
120 +++ b/arch/sparc/include/asm/obio.h
121 @@ -112,7 +112,11 @@ static inline int bw_get_intr_mask(int sbus_level)
123 static inline void bw_clear_intr_mask(int sbus_level, int mask)
125 - __asm__ __volatile__ ("stha %0, [%1] %2" : :
126 + /* Not used for LEON. B2B-nops just to make scan script happy. */
127 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
128 + "stha %0, [%1] %2\n\t"
129 + B2B_INLINE_DOUBLE_NOP
132 "r" (BW_LOCAL_BASE + BW_INTR_TABLE_CLEAR + (sbus_level << 3)),
134 diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
135 index 117009b03cf4..5c16cc8f0a38 100644
136 --- a/arch/sparc/include/asm/pgtsrmmu.h
137 +++ b/arch/sparc/include/asm/pgtsrmmu.h
139 restore %g0, %g0, %g0;
142 +#include <asm/asmmacro.h>
144 extern unsigned long last_valid_pfn;
146 /* This makes sense. Honest it does - Anton */
147 @@ -127,7 +129,10 @@ unsigned int srmmu_get_faddr(void);
148 /* This is guaranteed on all SRMMU's. */
149 static inline void srmmu_flush_whole_tlb(void)
151 - __asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
152 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
153 + "sta %%g0, [%0] %1\n\t"
154 + B2B_INLINE_DOUBLE_NOP
156 "r" (0x400), /* Flush entire TLB!! */
157 "i" (ASI_M_FLUSH_PROBE) : "memory");
159 diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
160 index 3c4bc2189092..bf7c364caa29 100644
161 --- a/arch/sparc/include/asm/processor_32.h
162 +++ b/arch/sparc/include/asm/processor_32.h
164 #include <asm/head.h>
165 #include <asm/signal.h>
166 #include <asm/page.h>
167 +#include <asm/asmmacro.h>
169 /* Whee, this is STACK_TOP + PAGE_SIZE and the lowest kernel address too...
170 * That one page is used to protect kernel from intruders, so that
171 @@ -73,15 +74,24 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
172 regs->npc = regs->pc + 4;
175 - __asm__ __volatile__("std\t%%g0, [%0 + %3 + 0x00]\n\t"
176 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
177 + "std\t%%g0, [%0 + %3 + 0x00]\n\t"
178 + B2B_INLINE_SINGLE_NOP
179 "std\t%%g0, [%0 + %3 + 0x08]\n\t"
180 + B2B_INLINE_SINGLE_NOP
181 "std\t%%g0, [%0 + %3 + 0x10]\n\t"
182 + B2B_INLINE_SINGLE_NOP
183 "std\t%%g0, [%0 + %3 + 0x18]\n\t"
184 + B2B_INLINE_SINGLE_NOP
185 "std\t%%g0, [%0 + %3 + 0x20]\n\t"
186 + B2B_INLINE_SINGLE_NOP
187 "std\t%%g0, [%0 + %3 + 0x28]\n\t"
188 + B2B_INLINE_SINGLE_NOP
189 "std\t%%g0, [%0 + %3 + 0x30]\n\t"
190 + B2B_INLINE_SINGLE_NOP
191 "st\t%1, [%0 + %3 + 0x38]\n\t"
192 - "st\t%%g0, [%0 + %3 + 0x3c]"
193 + "st\t%%g0, [%0 + %3 + 0x3c]\n\t"
194 + B2B_INLINE_DOUBLE_NOP
197 "r" (sp - sizeof(struct reg_window32)),
198 diff --git a/arch/sparc/include/asm/psr.h b/arch/sparc/include/asm/psr.h
199 index 65127ce565ab..4ad45ccfe8d8 100644
200 --- a/arch/sparc/include/asm/psr.h
201 +++ b/arch/sparc/include/asm/psr.h
206 +#include <asm/asmmacro.h>
208 /* Get the %psr register. */
209 static inline unsigned int get_psr(void)
211 @@ -55,6 +57,7 @@ static inline unsigned int get_fsr(void)
212 unsigned int fsr = 0;
214 __asm__ __volatile__(
215 + B2B_INLINE_DOUBLE_NOP
219 diff --git a/arch/sparc/include/asm/sbi.h b/arch/sparc/include/asm/sbi.h
220 index 4d6026c1e446..49b4e0aa4689 100644
221 --- a/arch/sparc/include/asm/sbi.h
222 +++ b/arch/sparc/include/asm/sbi.h
223 @@ -66,6 +66,8 @@ struct sbi_regs {
227 +#include <asm/asmmacro.h>
229 static inline int acquire_sbi(int devid, int mask)
231 __asm__ __volatile__ ("swapa [%2] %3, %0" :
232 @@ -78,7 +80,10 @@ static inline int acquire_sbi(int devid, int mask)
234 static inline void release_sbi(int devid, int mask)
236 - __asm__ __volatile__ ("sta %0, [%1] %2" : :
237 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
238 + "sta %0, [%1] %2\n\t"
239 + B2B_INLINE_DOUBLE_NOP
242 "r" (ECSR_DEV_BASE(devid) | SBI_INTR_STATE),
244 @@ -86,7 +91,10 @@ static inline void release_sbi(int devid, int mask)
246 static inline void set_sbi_tid(int devid, int targetid)
248 - __asm__ __volatile__ ("sta %0, [%1] %2" : :
249 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
250 + "sta %0, [%1] %2\n\t"
251 + B2B_INLINE_DOUBLE_NOP
254 "r" (ECSR_DEV_BASE(devid) | SBI_INTR_TID),
256 @@ -105,7 +113,10 @@ static inline int get_sbi_ctl(int devid, int cfgno)
258 static inline void set_sbi_ctl(int devid, int cfgno, int cfg)
260 - __asm__ __volatile__ ("sta %0, [%1] %2" : :
261 + __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
262 + "sta %0, [%1] %2\n\t"
263 + B2B_INLINE_DOUBLE_NOP
266 "r" ((ECSR_DEV_BASE(devid) | SBI_CFG0) + (cfgno<<2)),
268 diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
269 index bc5aa6f61676..adade4095cf2 100644
270 --- a/arch/sparc/include/asm/spinlock_32.h
271 +++ b/arch/sparc/include/asm/spinlock_32.h
274 #include <asm/barrier.h>
275 #include <asm/processor.h> /* for cpu_relax */
276 +#include <asm/asmmacro.h>
278 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
280 @@ -47,7 +48,10 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
282 static inline void arch_spin_unlock(arch_spinlock_t *lock)
284 - __asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
285 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
286 + "stb %%g0, [%0]\n\t"
287 + B2B_INLINE_DOUBLE_NOP
288 + : : "r" (lock) : "memory");
291 /* Read-write spinlocks, allowing multiple readers
292 @@ -133,7 +137,9 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
293 static inline void arch_write_unlock(arch_rwlock_t *lock)
295 __asm__ __volatile__(
297 +" " B2B_INLINE_DOUBLE_NOP
299 +" " B2B_INLINE_DOUBLE_NOP
303 diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
304 index 0a2d3ebc4bb8..98432ac982ab 100644
305 --- a/arch/sparc/include/asm/uaccess_32.h
306 +++ b/arch/sparc/include/asm/uaccess_32.h
308 #include <linux/string.h>
310 #include <asm/processor.h>
311 +#include <asm/asmmacro.h>
313 #define ARCH_HAS_SORT_EXTABLE
314 #define ARCH_HAS_SEARCH_EXTABLE
315 @@ -145,8 +146,10 @@ struct __large_struct { unsigned long buf[100]; };
316 #define __put_user_asm(x, size, addr, ret) \
317 __asm__ __volatile__( \
318 "/* Put user asm, inline. */\n" \
319 + B2B_INLINE_DOUBLE_NOP \
320 "1:\t" "st"#size " %1, %2\n\t" \
323 + B2B_INLINE_SINGLE_NOP \
325 ".section .fixup,#alloc,#execinstr\n\t" \
327 diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
328 index b6e911f5d93c..9c6208460a46 100644
329 --- a/arch/sparc/include/asm/winmacro.h
330 +++ b/arch/sparc/include/asm/winmacro.h
332 #define _SPARC_WINMACRO_H
334 #include <asm/ptrace.h>
335 +#include <asm/asmmacro.h>
337 /* Store the register window onto the 8-byte aligned area starting
338 * at %reg. It might be %sp, it might not, we don't care.
340 #define STORE_WINDOW(reg) \
341 std %l0, [%reg + RW_L0]; \
343 std %l2, [%reg + RW_L2]; \
345 std %l4, [%reg + RW_L4]; \
347 std %l6, [%reg + RW_L6]; \
349 std %i0, [%reg + RW_I0]; \
351 std %i2, [%reg + RW_I2]; \
353 std %i4, [%reg + RW_I4]; \
354 - std %i6, [%reg + RW_I6];
356 + std %i6, [%reg + RW_I6]; \
359 /* Load a register window from the area beginning at %reg. */
360 #define LOAD_WINDOW(reg) \
363 #define STORE_PT_INS(base_reg) \
364 std %i0, [%base_reg + STACKFRAME_SZ + PT_I0]; \
366 std %i2, [%base_reg + STACKFRAME_SZ + PT_I2]; \
368 std %i4, [%base_reg + STACKFRAME_SZ + PT_I4]; \
369 - std %i6, [%base_reg + STACKFRAME_SZ + PT_I6];
371 + std %i6, [%base_reg + STACKFRAME_SZ + PT_I6]; \
374 #define STORE_PT_GLOBALS(base_reg) \
375 st %g1, [%base_reg + STACKFRAME_SZ + PT_G1]; \
376 std %g2, [%base_reg + STACKFRAME_SZ + PT_G2]; \
378 std %g4, [%base_reg + STACKFRAME_SZ + PT_G4]; \
379 - std %g6, [%base_reg + STACKFRAME_SZ + PT_G6];
381 + std %g6, [%base_reg + STACKFRAME_SZ + PT_G6]; \
384 #define STORE_PT_YREG(base_reg, scratch) \
387 st %scratch, [%base_reg + STACKFRAME_SZ + PT_Y];
389 diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
390 index 3e5af37e4b9c..3c72d9644785 100644
391 --- a/arch/sparc/include/asm/xor_32.h
392 +++ b/arch/sparc/include/asm/xor_32.h
394 * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
397 +#include <asm/asmmacro.h>
400 sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
402 @@ -36,9 +38,13 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
403 "xor %%o2, %%l4, %%o2\n\t"
404 "xor %%o3, %%l5, %%o3\n\t"
405 "std %%g2, [%0 + 0x00]\n\t"
406 + B2B_INLINE_SINGLE_NOP
407 "std %%g4, [%0 + 0x08]\n\t"
408 + B2B_INLINE_SINGLE_NOP
409 "std %%o0, [%0 + 0x10]\n\t"
410 + B2B_INLINE_SINGLE_NOP
411 "std %%o2, [%0 + 0x18]\n"
412 + B2B_INLINE_SINGLE_NOP
415 : "g2", "g3", "g4", "g5",
416 @@ -86,9 +92,13 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
417 "xor %%o2, %%l4, %%o2\n\t"
418 "xor %%o3, %%l5, %%o3\n\t"
419 "std %%g2, [%0 + 0x00]\n\t"
420 + B2B_INLINE_SINGLE_NOP
421 "std %%g4, [%0 + 0x08]\n\t"
422 + B2B_INLINE_SINGLE_NOP
423 "std %%o0, [%0 + 0x10]\n\t"
424 + B2B_INLINE_SINGLE_NOP
425 "std %%o2, [%0 + 0x18]\n"
426 + B2B_INLINE_SINGLE_NOP
428 : "r" (p1), "r" (p2), "r" (p3)
429 : "g2", "g3", "g4", "g5",
430 @@ -149,9 +159,13 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
431 "xor %%o2, %%l4, %%o2\n\t"
432 "xor %%o3, %%l5, %%o3\n\t"
433 "std %%g2, [%0 + 0x00]\n\t"
434 + B2B_INLINE_SINGLE_NOP
435 "std %%g4, [%0 + 0x08]\n\t"
436 + B2B_INLINE_SINGLE_NOP
437 "std %%o0, [%0 + 0x10]\n\t"
438 + B2B_INLINE_SINGLE_NOP
439 "std %%o2, [%0 + 0x18]\n"
440 + B2B_INLINE_SINGLE_NOP
442 : "r" (p1), "r" (p2), "r" (p3), "r" (p4)
443 : "g2", "g3", "g4", "g5",
444 @@ -225,9 +239,13 @@ sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
445 "xor %%o2, %%l4, %%o2\n\t"
446 "xor %%o3, %%l5, %%o3\n\t"
447 "std %%g2, [%0 + 0x00]\n\t"
448 + B2B_INLINE_SINGLE_NOP
449 "std %%g4, [%0 + 0x08]\n\t"
450 + B2B_INLINE_SINGLE_NOP
451 "std %%o0, [%0 + 0x10]\n\t"
452 + B2B_INLINE_SINGLE_NOP
453 "std %%o2, [%0 + 0x18]\n"
454 + B2B_INLINE_SINGLE_NOP
456 : "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
457 : "g2", "g3", "g4", "g5",
458 diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
459 index 87c68aeeb794..c0fdf1de10f2 100644
460 --- a/arch/sparc/kernel/entry.S
461 +++ b/arch/sparc/kernel/entry.S
462 @@ -121,6 +121,7 @@ floppy_tdone:
463 sethi %hi(pdma_vaddr), %l5
464 st %l4, [%l5 + %lo(pdma_vaddr)]
465 sethi %hi(pdma_size), %l5
467 st %l6, [%l5 + %lo(pdma_size)]
468 /* Flip terminal count pin */
469 set auxio_register, %l7
470 @@ -138,11 +139,13 @@ floppy_tdone:
476 /* Prevent recursion */
477 sethi %hi(doing_pdma), %l7
478 + st %g0, [%l7 + %lo(doing_pdma)]
480 - st %g0, [%l7 + %lo(doing_pdma)]
483 /* We emptied the FIFO, but we haven't read everything
484 * as of yet. Store the current transfer address and
485 @@ -153,6 +156,7 @@ floppy_fifo_emptied:
486 sethi %hi(pdma_vaddr), %l5
487 st %l4, [%l5 + %lo(pdma_vaddr)]
488 sethi %hi(pdma_size), %l7
490 st %l6, [%l7 + %lo(pdma_size)]
492 /* Restore condition codes */
493 @@ -165,10 +169,12 @@ floppy_fifo_emptied:
495 sethi %hi(pdma_vaddr), %l5
496 st %l4, [%l5 + %lo(pdma_vaddr)]
498 sethi %hi(pdma_size), %l5
499 st %l6, [%l5 + %lo(pdma_size)]
500 /* Prevent recursion */
501 sethi %hi(doing_pdma), %l7
503 st %g0, [%l7 + %lo(doing_pdma)]
505 /* fall through... */
506 @@ -323,8 +329,9 @@ linux_trap_ipi15_sun4m:
508 ld [%o5 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending
510 + st %o2, [%o5 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x80000000
511 be sun4m_nmi_error ! Must be an NMI async memory error
512 - st %o2, [%o5 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x80000000
515 ld [%o5 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending
517 @@ -1024,8 +1031,9 @@ ret_sys_call:
518 ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
519 add %l1, 0x4, %l2 /* npc = npc+4 */
520 st %l1, [%sp + STACKFRAME_SZ + PT_PC]
521 + st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
523 - st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
526 /* System call failure, set Carry condition code.
527 * Also, get abs(errno) to return to the process.
528 @@ -1038,8 +1046,9 @@ ret_sys_call:
529 ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
530 add %l1, 0x4, %l2 /* npc = npc+4 */
531 st %l1, [%sp + STACKFRAME_SZ + PT_PC]
532 + st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
534 - st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
537 linux_syscall_trace2:
538 add %sp, STACKFRAME_SZ, %o0
539 @@ -1047,8 +1056,9 @@ linux_syscall_trace2:
541 add %l1, 0x4, %l2 /* npc = npc+4 */
542 st %l1, [%sp + STACKFRAME_SZ + PT_PC]
543 + st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
545 - st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
549 /* Saving and restoring the FPU state is best done from lowlevel code.
550 @@ -1070,6 +1080,7 @@ fpsave:
551 /* We have an fpqueue to save. */
558 @@ -1086,22 +1097,39 @@ fpsave_magic:
561 std %f0, [%o0 + 0x00]
563 std %f2, [%o0 + 0x08]
565 std %f4, [%o0 + 0x10]
567 std %f6, [%o0 + 0x18]
569 std %f8, [%o0 + 0x20]
571 std %f10, [%o0 + 0x28]
573 std %f12, [%o0 + 0x30]
575 std %f14, [%o0 + 0x38]
577 std %f16, [%o0 + 0x40]
579 std %f18, [%o0 + 0x48]
581 std %f20, [%o0 + 0x50]
583 std %f22, [%o0 + 0x58]
585 std %f24, [%o0 + 0x60]
587 std %f26, [%o0 + 0x68]
589 std %f28, [%o0 + 0x70]
591 + std %f30, [%o0 + 0x78]
594 - std %f30, [%o0 + 0x78]
597 /* Thanks for Theo Deraadt and the authors of the Sprite/netbsd/openbsd
598 * code for pointing out this possible deadlock, while we save state
599 @@ -1109,8 +1137,9 @@ fpsave_magic:
600 * code has to know how to deal with this.
609 st %fsr, [%o1] /* In this case, this is the first successful fsr read */
610 @@ -1267,8 +1296,9 @@ kuw_patch1:
611 wr %o5, 0x0, %psr ! re-enable interrupts
612 WRITE_PAUSE ! burn baby burn
614 + st %g0, [%g6 + TI_W_SAVED] ! no windows saved
616 - st %g0, [%g6 + TI_W_SAVED] ! no windows saved
620 .globl restore_current
621 diff --git a/arch/sparc/kernel/etrap_32.S b/arch/sparc/kernel/etrap_32.S
622 index 9f243f918619..860df075a355 100644
623 --- a/arch/sparc/kernel/etrap_32.S
624 +++ b/arch/sparc/kernel/etrap_32.S
625 @@ -253,6 +253,7 @@ trap_setup_user_stack_is_bolixed:
626 or %glob_tmp, 0x2, %glob_tmp ! or in no_fault bit
627 LEON_PI(sta %glob_tmp, [%g0] ASI_LEON_MMUREGS) ! set it
628 SUN_PI_(sta %glob_tmp, [%g0] ASI_M_MMUREGS) ! set it
631 /* Dump the registers and cross fingers. */
633 diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
634 index be30c8d4cc73..cdff4d974434 100644
635 --- a/arch/sparc/kernel/head_32.S
636 +++ b/arch/sparc/kernel/head_32.S
638 #include <asm/errno.h>
639 #include <asm/pgtable.h> /* PGDIR_SHIFT */
640 #include <asm/export.h>
641 +#include <asm/asmmacro.h>
644 /* The following are used with the prom_vector node-ops to figure out
645 @@ -365,6 +366,7 @@ execute_in_high_mem:
647 sethi %hi(prom_vector_p), %g1
648 st %o0, [%g1 + %lo(prom_vector_p)]
651 sethi %hi(linux_dbvec), %g1
652 st %o1, [%g1 + %lo(linux_dbvec)]
653 @@ -465,6 +467,7 @@ sun4d_init:
655 sta %g4, [%g0] ASI_M_VIKING_TMP1
656 sethi %hi(boot_cpu_id), %g5
658 stb %g4, [%g5 + %lo(boot_cpu_id)]
661 @@ -550,6 +553,7 @@ continue_boot:
669 @@ -624,21 +628,27 @@ continue_boot:
670 set flush_patch_one, %g5
674 set flush_patch_two, %g5
678 set flush_patch_three, %g5
682 set flush_patch_four, %g5
686 set flush_patch_exception, %g5
690 set flush_patch_switch, %g5
696 sethi %hi(nwindows), %g4
697 @@ -738,8 +748,9 @@ no_sun4u_here:
708 @@ -750,13 +761,15 @@ no_sun4u_here:
710 ld [%l1 + (sun4u_r1 - sun4u_a1)], %o1
711 add %l1, (sun4u_a2 - sun4u_a1), %o0
712 + st %o1, [%o0 + (sun4u_i2 - sun4u_a2)]
714 - st %o1, [%o0 + (sun4u_i2 - sun4u_a2)]
717 ld [%l1 + (sun4u_1 - sun4u_a1)], %o1
718 add %l1, (sun4u_a3 - sun4u_a1), %o0
720 st %o1, [%o0 + (sun4u_i3 - sun4u_a3)]
725 add %l1, (sun4u_a4 - sun4u_a1), %o0
726 diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
727 index 1eed26d423fb..f726d950e347 100644
728 --- a/arch/sparc/kernel/leon_smp.c
729 +++ b/arch/sparc/kernel/leon_smp.c
731 #include <asm/leon.h>
732 #include <asm/leon_amba.h>
733 #include <asm/timer.h>
734 +#include <asm/asmmacro.h>
738 @@ -391,9 +392,14 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
739 register unsigned long a4 asm("i4") = arg4;
740 register unsigned long a5 asm("i5") = 0;
742 - __asm__ __volatile__("std %0, [%6]\n\t"
743 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
745 + B2B_INLINE_SINGLE_NOP
746 "std %2, [%6 + 8]\n\t"
747 - "std %4, [%6 + 16]\n\t" : :
748 + B2B_INLINE_SINGLE_NOP
749 + "std %4, [%6 + 16]\n\t"
750 + B2B_INLINE_SINGLE_NOP
752 "r"(f), "r"(a1), "r"(a2), "r"(a3),
754 "r"(&ccall_info.func));
755 diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
756 index ff30f03beb7c..b06eaf4d1256 100644
757 --- a/arch/sparc/kernel/sun4d_smp.c
758 +++ b/arch/sparc/kernel/sun4d_smp.c
760 #include <asm/oplib.h>
763 +#include <asm/asmmacro.h>
767 @@ -304,9 +305,14 @@ static void sun4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
768 register unsigned long a5 asm("i5") = 0;
770 __asm__ __volatile__(
771 + B2B_INLINE_DOUBLE_NOP
773 + B2B_INLINE_SINGLE_NOP
774 "std %2, [%6 + 8]\n\t"
775 - "std %4, [%6 + 16]\n\t" : :
776 + B2B_INLINE_SINGLE_NOP
777 + "std %4, [%6 + 16]\n\t"
778 + B2B_INLINE_SINGLE_NOP
780 "r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
781 "r" (&ccall_info.func));
783 diff --git a/arch/sparc/kernel/una_asm_32.S b/arch/sparc/kernel/una_asm_32.S
784 index f8bf839289fb..177011bebe3e 100644
785 --- a/arch/sparc/kernel/una_asm_32.S
786 +++ b/arch/sparc/kernel/una_asm_32.S
790 #include <linux/errno.h>
791 +#include <asm/asmmacro.h>
795 @@ -32,26 +33,30 @@ __do_int_store:
800 5: stb %g7, [%o0 + 1]
803 6: stb %g2, [%o0 + 2]
805 7: stb %g1, [%o0 + 3]
808 8: stb %g2, [%o0 + 4]
811 9: stb %g1, [%o0 + 5]
812 10: stb %g2, [%o0 + 6]
813 +11: stb %g7, [%o0 + 7]
815 -11: stb %g7, [%o0 + 7]
822 13: stb %g7, [%o0 + 1]
823 14: stb %g2, [%o0 + 2]
824 +15: stb %g1, [%o0 + 3]
826 -15: stb %g1, [%o0 + 3]
830 17: stb %g1, [%o0 + 1]
831 @@ -99,8 +104,9 @@ do_int_load:
840 6: ldub [%o2 + 1], %g2
842 7: ldub [%o2 + 2], %g7
843 @@ -110,8 +116,9 @@ do_int_load:
852 10: ldub [%o2 + 1], %g2
854 diff --git a/arch/sparc/kernel/wof.S b/arch/sparc/kernel/wof.S
855 index 96a3a112423a..8538818424c0 100644
856 --- a/arch/sparc/kernel/wof.S
857 +++ b/arch/sparc/kernel/wof.S
858 @@ -124,6 +124,8 @@ spwin_no_userwins_from_kernel:
859 jmp %t_pc ! Return from trap
860 rett %t_npc ! we are done
862 + B2B_SINGLE_NOP ! To not trigger delay slot warning
865 /* LOCATION: Trap window */
867 @@ -341,7 +343,7 @@ SUN_PI_(lda [%g0] ASI_M_MMUREGS, %glob_tmp) ! read MMU control
868 or %glob_tmp, 0x2, %glob_tmp ! or in no_fault bit
869 LEON_PI(sta %glob_tmp, [%g0] ASI_LEON_MMUREGS) ! set it
870 SUN_PI_(sta %glob_tmp, [%g0] ASI_M_MMUREGS) ! set it
873 /* Dump the registers and cross fingers. */
876 diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S
877 index 76ddd1ff6833..9f66d08ff8a3 100644
878 --- a/arch/sparc/lib/blockops.S
879 +++ b/arch/sparc/lib/blockops.S
881 #include <linux/linkage.h>
882 #include <asm/page.h>
883 #include <asm/export.h>
884 +#include <asm/asmmacro.h>
886 /* Zero out 64 bytes of memory at (buf + offset).
887 * Assumes %g1 contains zero.
889 #define BLAST_BLOCK(buf, offset) \
890 std %g0, [buf + offset + 0x38]; \
892 std %g0, [buf + offset + 0x30]; \
894 std %g0, [buf + offset + 0x28]; \
896 std %g0, [buf + offset + 0x20]; \
898 std %g0, [buf + offset + 0x18]; \
900 std %g0, [buf + offset + 0x10]; \
902 std %g0, [buf + offset + 0x08]; \
903 - std %g0, [buf + offset + 0x00];
905 + std %g0, [buf + offset + 0x00]; \
908 /* Copy 32 bytes of memory at (src + offset) to
911 ldd [src + offset + 0x08], t4; \
912 ldd [src + offset + 0x00], t6; \
913 std t0, [dst + offset + 0x18]; \
915 std t2, [dst + offset + 0x10]; \
917 std t4, [dst + offset + 0x08]; \
918 - std t6, [dst + offset + 0x00];
920 + std t6, [dst + offset + 0x00]; \
923 /* Profiling evidence indicates that memset() is
924 * commonly called for blocks of size PAGE_SIZE,
925 diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
926 index 7488d130faf7..1f5b2daf2d51 100644
927 --- a/arch/sparc/lib/checksum_32.S
928 +++ b/arch/sparc/lib/checksum_32.S
929 @@ -190,39 +190,47 @@ cpout: retl ! get outta here
930 * because of this we thus do all the ldd's together to get
931 * Viking MXCC into streaming mode. Ho hum...
933 + /* B2B-FIX-NOTE: The fixup section is affected only by number of
934 + * instructions and where the load instructions are located in this
935 + * macro. Neither of those factors have been changed.
937 #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
938 ldd [src + off + 0x00], t0; \
939 ldd [src + off + 0x08], t2; \
940 ldd [src + off + 0x10], t4; \
941 ldd [src + off + 0x18], t6; \
942 st t0, [dst + off + 0x00]; \
943 - addxcc t0, sum, sum; \
944 st t1, [dst + off + 0x04]; \
945 + addxcc t0, sum, sum; \
946 addxcc t1, sum, sum; \
947 st t2, [dst + off + 0x08]; \
948 - addxcc t2, sum, sum; \
949 st t3, [dst + off + 0x0c]; \
950 + addxcc t2, sum, sum; \
951 addxcc t3, sum, sum; \
952 st t4, [dst + off + 0x10]; \
953 - addxcc t4, sum, sum; \
954 st t5, [dst + off + 0x14]; \
955 + addxcc t4, sum, sum; \
956 addxcc t5, sum, sum; \
957 st t6, [dst + off + 0x18]; \
958 - addxcc t6, sum, sum; \
959 st t7, [dst + off + 0x1c]; \
960 + addxcc t6, sum, sum; \
963 /* Yuck, 6 superscalar cycles... */
964 + /* B2B-FIX-NOTE: The fixup section is affected only by number of
965 + * instructions and where the load instructions are located in this
966 + * macro. Neither of those factors have been changed.
968 #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
969 ldd [src - off - 0x08], t0; \
970 ldd [src - off - 0x00], t2; \
971 addxcc t0, sum, sum; \
972 - st t0, [dst - off - 0x08]; \
973 addxcc t1, sum, sum; \
974 + st t0, [dst - off - 0x08]; \
975 st t1, [dst - off - 0x04]; \
976 addxcc t2, sum, sum; \
977 - st t2, [dst - off - 0x00]; \
978 addxcc t3, sum, sum; \
979 + st t2, [dst - off - 0x00]; \
980 st t3, [dst - off + 0x04];
982 /* Handle the end cruft code out of band for better cache patterns. */
983 @@ -399,8 +407,8 @@ ccslow: cmp %g1, 0
990 EX(stb %o4, [%o1 + 1])
993 @@ -413,10 +421,10 @@ ccslow: cmp %g1, 0
997 - EX(stb %g3, [%o1 + 1])
999 - EX(stb %g2, [%o1 + 2])
1001 + EX(stb %g3, [%o1 + 1])
1002 + EX(stb %g2, [%o1 + 2])
1003 EX(stb %o4, [%o1 + 3])
1004 addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it
1005 add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
1006 @@ -435,8 +443,8 @@ ccslow: cmp %g1, 0
1011 EX(stb %o4, [%o1 + 1])
1016 diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
1017 index dc72f2b970b7..b7cd5165497d 100644
1018 --- a/arch/sparc/lib/copy_user.S
1019 +++ b/arch/sparc/lib/copy_user.S
1021 #include <asm/page.h>
1022 #include <asm/thread_info.h>
1023 #include <asm/export.h>
1024 +#include <asm/asmmacro.h>
1026 /* Work around cpp -rob */
1027 #define ALLOC #alloc
1030 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
1031 ldd [%src + (offset) + 0x00], %t0; \
1032 - ldd [%src + (offset) + 0x08], %t2; \
1033 - ldd [%src + (offset) + 0x10], %t4; \
1034 - ldd [%src + (offset) + 0x18], %t6; \
1035 std %t0, [%dst + (offset) + 0x00]; \
1036 + ldd [%src + (offset) + 0x08], %t2; \
1037 std %t2, [%dst + (offset) + 0x08]; \
1038 + ldd [%src + (offset) + 0x10], %t4; \
1039 std %t4, [%dst + (offset) + 0x10]; \
1040 + ldd [%src + (offset) + 0x18], %t6; \
1041 std %t6, [%dst + (offset) + 0x18];
1043 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
1044 diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S
1045 index 9a1289a3fb28..066717755d67 100644
1046 --- a/arch/sparc/lib/locks.S
1047 +++ b/arch/sparc/lib/locks.S
1048 @@ -92,7 +92,16 @@ ___rw_write_enter:
1049 bne ___rw_write_enter_spin_on_wlock
1051 andncc %g2, 0xff, %g0
1052 +#ifdef __FIX_LEON3FT_B2BST
1055 + stb %g0, [%g1 + 3]
1056 + b ___rw_write_enter_spin_on_wlock
1060 bne,a ___rw_write_enter_spin_on_wlock
1065 diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
1066 index ee823d8c9215..dac6d6f0fe3c 100644
1067 --- a/arch/sparc/lib/memcpy.S
1068 +++ b/arch/sparc/lib/memcpy.S
1069 @@ -32,12 +32,12 @@ x:
1071 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
1072 ldd [%src + (offset) + 0x00], %t0; \
1073 - ldd [%src + (offset) + 0x08], %t2; \
1074 - ldd [%src + (offset) + 0x10], %t4; \
1075 - ldd [%src + (offset) + 0x18], %t6; \
1076 std %t0, [%dst + (offset) + 0x00]; \
1077 + ldd [%src + (offset) + 0x08], %t2; \
1078 std %t2, [%dst + (offset) + 0x08]; \
1079 + ldd [%src + (offset) + 0x10], %t4; \
1080 std %t4, [%dst + (offset) + 0x10]; \
1081 + ldd [%src + (offset) + 0x18], %t6; \
1082 std %t6, [%dst + (offset) + 0x18];
1084 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
1085 @@ -50,8 +50,8 @@ x:
1087 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
1088 ldd [%src - (offset) - 0x10], %t0; \
1089 - ldd [%src - (offset) - 0x08], %t2; \
1090 std %t0, [%dst - (offset) - 0x10]; \
1091 + ldd [%src - (offset) - 0x08], %t2; \
1092 std %t2, [%dst - (offset) - 0x08];
1094 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
1095 @@ -192,8 +192,8 @@ EXPORT_SYMBOL(memcpy)
1099 - st %g2, [%o0 - 0x08]
1101 + st %g2, [%o0 - 0x08]
1102 st %g3, [%o0 - 0x04]
1104 81: /* memcpy_last7 */
1105 diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
1106 index f427f34b8b79..77ea205b8d66 100644
1107 --- a/arch/sparc/lib/memset.S
1108 +++ b/arch/sparc/lib/memset.S
1111 #include <asm/ptrace.h>
1112 #include <asm/export.h>
1113 +#include <asm/asmmacro.h>
1115 /* Work around cpp -rob */
1116 #define ALLOC #alloc
1118 * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
1119 #define ZERO_BIG_BLOCK(base, offset, source) \
1120 std source, [base + offset + 0x00]; \
1122 std source, [base + offset + 0x08]; \
1124 std source, [base + offset + 0x10]; \
1126 std source, [base + offset + 0x18]; \
1128 std source, [base + offset + 0x20]; \
1130 std source, [base + offset + 0x28]; \
1132 std source, [base + offset + 0x30]; \
1133 - std source, [base + offset + 0x38];
1135 + std source, [base + offset + 0x38]; \
1138 #define ZERO_LAST_BLOCKS(base, offset, source) \
1139 std source, [base - offset - 0x38]; \
1141 std source, [base - offset - 0x30]; \
1143 std source, [base - offset - 0x28]; \
1145 std source, [base - offset - 0x20]; \
1147 std source, [base - offset - 0x18]; \
1149 std source, [base - offset - 0x10]; \
1151 std source, [base - offset - 0x08]; \
1152 - std source, [base - offset - 0x00];
1154 + std source, [base - offset - 0x00]; \
1159 @@ -82,12 +99,14 @@ memset:
1163 + EX(stb %g3, [%o0], sub %o1, 0)
1165 - EX(stb %g3, [%o0], sub %o1, 0)
1169 + EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
1171 - EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
1174 EX(stb %g3, [%o0 + 0x02], sub %o1, 2)
1176 @@ -132,7 +151,11 @@ __bzero:
1181 +#ifdef __FIX_LEON3FT_B2BST
1182 + mov %o2, %o3 /* 8 bytes of std+nop sets 8 bytes of memory */
1184 + srl %o2, 1, %o3 /* 4 bytes of std sets 8 bytes of memory */
1189 @@ -158,8 +181,9 @@ __bzero:
1190 EX(sth %g3, [%o0], and %o1, 3)
1194 - EX(stb %g3, [%o0], and %o1, 1)
1197 + EX(stb %g3, [%o0], and %o1, 1)
1201 @@ -171,8 +195,9 @@ __bzero:
1205 + EX(stb %g3, [%o0 - 1], add %o1, 1)
1207 - EX(stb %g3, [%o0 - 1], add %o1, 1)
1212 @@ -180,23 +205,56 @@ __bzero:
1222 .section .fixup,#alloc,#execinstr
1226 + * We got a fault in the 10: to 11: address range.
1229 + * - %g2 now contains the index (within the range) of the instruction that
1231 + * - %o1 contains the number of bytes that were left to set/zero before
1232 + * entering the loop the first time.
1233 + * - %l3 contains the number of bytes left for the loop to set/zero
1234 + * (but adjusted in the middle of the loop)
1237 +#ifdef __FIX_LEON3FT_B2BST
1238 + cmp %g2, 16 /* Double number of instructions per half */
1246 + /* We were in second half of the 10: to 11: block */
1247 +#ifdef __FIX_LEON3FT_B2BST
1248 + sub %g2, 17, %g2 /* Adjust index: 8 std + nop pairs + one subcc */
1250 + sub %g2, 9, %g2 /* Adjust index to start of ZERO_BIG_BLOCK */
1252 + add %o3, 64, %o3 /* Adjust bytes left in turn of the loop */
1253 + /* (due to the subcc being in the middle ) */
1258 + * Convert index of faulting instruction within ZERO_BIG_BLOCK to
1259 + * number of bytes written
1261 +#ifdef __FIX_LEON3FT_B2BST
1262 + sll %g2, 2, %g2 /* 8 bytes is written per 2 instructions (std+nop) */
1264 + sll %g2, 3, %g2 /* 8 bytes is written per std instruction */
1266 + add %o3, %o1, %o0 /* Bytes left before faulting ZERO_BIG_BLOCK */
1269 + sub %o0, %g2, %o0 /* Subtract bytes written by the faulting */
1270 + /* ZERO_BIG_BLOCK => the number of bytes */
1271 + /* that were not set/zeroed. */
1275 diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
1276 index 6c2521e85a42..513ea55441b7 100644
1277 --- a/arch/sparc/mm/hypersparc.S
1278 +++ b/arch/sparc/mm/hypersparc.S
1280 #include <asm/pgtable.h>
1281 #include <asm/pgtsrmmu.h>
1282 #include <linux/init.h>
1283 +#include <asm/asmmacro.h>
1287 @@ -32,10 +33,12 @@ hypersparc_flush_cache_all:
1288 ld [%g1 + %lo(vac_line_size)], %g2
1290 subcc %g5, %g2, %g5 ! hyper_flush_unconditional_combined
1291 + sta %g0, [%g5] ASI_M_FLUSH_CTX
1293 - sta %g0, [%g5] ASI_M_FLUSH_CTX
1295 + sta %g0, [%g0] ASI_M_FLUSH_IWHOLE ! hyper_flush_whole_icache
1297 - sta %g0, [%g0] ASI_M_FLUSH_IWHOLE ! hyper_flush_whole_icache
1300 /* We expand the window flush to get maximum performance. */
1301 hypersparc_flush_cache_mm:
1302 @@ -68,8 +71,9 @@ hypersparc_flush_cache_mm:
1303 sta %g0, [%o0 + %g3] ASI_M_FLUSH_USER
1304 sta %g0, [%o0 + %g4] ASI_M_FLUSH_USER
1305 sta %g0, [%o0 + %g5] ASI_M_FLUSH_USER
1306 + sta %g0, [%o0 + %o4] ASI_M_FLUSH_USER
1308 - sta %g0, [%o0 + %o4] ASI_M_FLUSH_USER
1310 hypersparc_flush_cache_mm_out:
1313 @@ -117,8 +121,9 @@ hypersparc_flush_cache_range:
1314 sta %g0, [%o3 + %g2] ASI_M_FLUSH_USER
1315 sta %g0, [%o3 + %g3] ASI_M_FLUSH_USER
1316 sta %g0, [%o3 + %g4] ASI_M_FLUSH_USER
1317 + sta %g0, [%o3 + %g5] ASI_M_FLUSH_USER
1319 - sta %g0, [%o3 + %g5] ASI_M_FLUSH_USER
1324 @@ -145,9 +150,11 @@ hypersparc_flush_cache_range:
1325 sta %g0, [%o2 + %g2] ASI_M_FLUSH_PAGE
1326 sta %g0, [%o2 + %g3] ASI_M_FLUSH_PAGE
1327 andcc %o2, 0xffc, %g0
1329 sta %g0, [%o2 + %g4] ASI_M_FLUSH_PAGE
1330 + sta %g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
1332 - sta %g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
1337 @@ -202,9 +209,11 @@ hypersparc_flush_cache_page:
1338 sta %g0, [%o1 + %g2] ASI_M_FLUSH_PAGE
1339 sta %g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
1340 andcc %o1, 0xffc, %g0
1342 sta %g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
1343 + sta %g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
1345 - sta %g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
1348 mov SRMMU_FAULT_STATUS, %g7
1349 mov SRMMU_CTX_REG, %g4
1350 @@ -247,9 +256,11 @@ hypersparc_flush_page_to_ram:
1351 sta %g0, [%o0 + %g2] ASI_M_FLUSH_PAGE
1352 sta %g0, [%o0 + %g3] ASI_M_FLUSH_PAGE
1353 andcc %o0, 0xffc, %g0
1355 sta %g0, [%o0 + %g4] ASI_M_FLUSH_PAGE
1356 + sta %g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
1358 - sta %g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
1361 mov SRMMU_FAULT_STATUS, %g1
1363 @@ -282,8 +293,9 @@ hypersparc_flush_tlb_mm:
1364 sta %o1, [%g1] ASI_M_MMUREGS
1365 sta %g0, [%g2] ASI_M_FLUSH_PROBE
1366 hypersparc_flush_tlb_mm_out:
1367 + sta %g5, [%g1] ASI_M_MMUREGS
1369 - sta %g5, [%g1] ASI_M_MMUREGS
1372 hypersparc_flush_tlb_range:
1373 ld [%o0 + VMA_VM_MM], %o0
1374 @@ -298,15 +310,16 @@ hypersparc_flush_tlb_range:
1375 sta %o3, [%g1] ASI_M_MMUREGS
1378 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1380 + sta %g0, [%o1] ASI_M_FLUSH_PROBE
1384 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1387 hypersparc_flush_tlb_range_out:
1388 + sta %g5, [%g1] ASI_M_MMUREGS
1390 - sta %g5, [%g1] ASI_M_MMUREGS
1393 hypersparc_flush_tlb_page:
1394 ld [%o0 + VMA_VM_MM], %o0
1395 @@ -321,8 +334,9 @@ hypersparc_flush_tlb_page:
1396 sta %o3, [%g1] ASI_M_MMUREGS
1397 sta %g0, [%o1] ASI_M_FLUSH_PROBE
1398 hypersparc_flush_tlb_page_out:
1399 + sta %g5, [%g1] ASI_M_MMUREGS
1401 - sta %g5, [%g1] ASI_M_MMUREGS
1406 @@ -340,12 +354,19 @@ hypersparc_bzero_1page:
1409 stda %g0, [%o0 + %g0] ASI_M_BFILL
1411 stda %g0, [%o0 + %g2] ASI_M_BFILL
1413 stda %g0, [%o0 + %g3] ASI_M_BFILL
1415 stda %g0, [%o0 + %g4] ASI_M_BFILL
1417 stda %g0, [%o0 + %g5] ASI_M_BFILL
1419 stda %g0, [%o0 + %g7] ASI_M_BFILL
1421 stda %g0, [%o0 + %o2] ASI_M_BFILL
1423 stda %g0, [%o0 + %o3] ASI_M_BFILL
1426 @@ -361,17 +382,24 @@ hypersparc_copy_1page:
1428 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1431 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1434 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1437 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1440 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1443 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1446 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1449 sta %o0, [%o0 + %o2] ASI_M_BCOPY
1451 diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
1452 index f8ac99759ed3..c2e0b2905035 100644
1453 --- a/arch/sparc/mm/leon_mm.c
1454 +++ b/arch/sparc/mm/leon_mm.c
1456 #include <asm/leon.h>
1457 #include <asm/tlbflush.h>
1458 #include <asm/pgtsrmmu.h>
1459 +#include <asm/asmmacro.h>
1463 @@ -188,7 +189,10 @@ void leon_flush_icache_all(void)
1465 void leon_flush_dcache_all(void)
1467 - __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
1468 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
1469 + "sta %%g0, [%%g0] %0\n\t"
1470 + B2B_INLINE_DOUBLE_NOP
1472 "i"(ASI_LEON_DFLUSH) : "memory");
1475 @@ -201,15 +205,21 @@ void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page)
1477 void leon_flush_cache_all(void)
1479 + __asm__ __volatile__(B2B_INLINE_SINGLE_NOP);
1480 __asm__ __volatile__(".align 32\nflush\n.align 32\n"); /*iflush*/
1481 __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
1482 "i"(ASI_LEON_DFLUSH) : "memory");
1483 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP);
1487 void leon_flush_tlb_all(void)
1489 leon_flush_cache_all();
1490 - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : "r"(0x400),
1491 + __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
1492 + "sta %%g0, [%0] %1\n\t"
1493 + B2B_INLINE_DOUBLE_NOP
1495 "i"(ASI_LEON_MMUFLUSH) : "memory");
1498 diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
1499 index 4ab2e43e93a1..0d523aaf2c1c 100644
1500 --- a/arch/sparc/mm/srmmu.c
1501 +++ b/arch/sparc/mm/srmmu.c
1503 #include <asm/mmu_context.h>
1504 #include <asm/cacheflush.h>
1505 #include <asm/tlbflush.h>
1506 +#include <asm/asmmacro.h>
1507 #include <asm/io-unit.h>
1508 #include <asm/pgalloc.h>
1509 #include <asm/pgtable.h>
1510 @@ -129,7 +130,9 @@ static void msi_set_sync(void)
1512 __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t"
1513 "andn %%g3, %2, %%g3\n\t"
1514 - "sta %%g3, [%0] %1\n\t" : :
1515 + "sta %%g3, [%0] %1\n\t"
1516 + B2B_INLINE_DOUBLE_NOP
1518 "r" (MSI_MBUS_ARBEN),
1519 "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3");
1521 diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S
1522 index f414bfd8d899..303b86ff3864 100644
1523 --- a/arch/sparc/mm/swift.S
1524 +++ b/arch/sparc/mm/swift.S
1526 #include <asm/page.h>
1527 #include <asm/pgtsrmmu.h>
1528 #include <asm/asm-offsets.h>
1529 +#include <asm/asmmacro.h>
1533 @@ -32,8 +33,9 @@ swift_flush_page_to_ram:
1534 1: subcc %o0, 0x10, %o0
1536 sta %g0, [%o0] ASI_M_DATAC_TAG
1537 + sta %g0, [%o1] ASI_M_TXTC_TAG
1539 - sta %g0, [%o1] ASI_M_TXTC_TAG
1544 @@ -46,8 +48,9 @@ swift_flush_cache_all:
1545 sethi %hi(16 * 1024), %o0
1546 1: subcc %o0, 16, %o0
1547 sta %g0, [%o0] ASI_M_TXTC_TAG
1548 + sta %g0, [%o0] ASI_M_DATAC_TAG
1550 - sta %g0, [%o0] ASI_M_DATAC_TAG
1555 diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S
1556 index 62b742df65dc..fc0c18e74408 100644
1557 --- a/arch/sparc/mm/tsunami.S
1558 +++ b/arch/sparc/mm/tsunami.S
1560 #include <asm/asi.h>
1561 #include <asm/page.h>
1562 #include <asm/pgtsrmmu.h>
1563 +#include <asm/asmmacro.h>
1567 @@ -81,8 +82,9 @@ tsunami_flush_tlb_page:
1570 tsunami_flush_tlb_page_out:
1571 + sta %g5, [%g1] ASI_M_MMUREGS
1573 - sta %g5, [%g1] ASI_M_MMUREGS
1576 #define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3) \
1577 ldd [src + offset + 0x18], t0; \
1578 diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
1579 index 48f062de7a7f..de913516fdeb 100644
1580 --- a/arch/sparc/mm/viking.S
1581 +++ b/arch/sparc/mm/viking.S
1583 #include <asm/pgtable.h>
1584 #include <asm/pgtsrmmu.h>
1585 #include <asm/viking.h>
1586 +#include <asm/asmmacro.h>
1590 @@ -99,8 +100,8 @@ viking_mxcc_flush_page:
1591 sub %g3, MXCC_STREAM_SIZE, %g3
1593 stda %g2, [%o2] ASI_M_MXCC
1594 - stda %g2, [%o3] ASI_M_MXCC
1595 andncc %g3, PAGE_MASK, %g0
1596 + stda %g2, [%o3] ASI_M_MXCC
1598 sub %g3, MXCC_STREAM_SIZE, %g3
1600 @@ -128,8 +129,9 @@ viking_flush_cache_out:
1602 viking_flush_tlb_all:
1604 + sta %g0, [%g1] ASI_M_FLUSH_PROBE
1606 - sta %g0, [%g1] ASI_M_FLUSH_PROBE
1609 viking_flush_tlb_mm:
1610 mov SRMMU_CTX_REG, %g1
1611 @@ -142,8 +144,9 @@ viking_flush_tlb_mm:
1613 sta %o1, [%g1] ASI_M_MMUREGS
1614 sta %g0, [%g2] ASI_M_FLUSH_PROBE
1615 + sta %g5, [%g1] ASI_M_MMUREGS
1617 - sta %g5, [%g1] ASI_M_MMUREGS
1622 @@ -162,13 +165,14 @@ viking_flush_tlb_range:
1623 sta %o3, [%g1] ASI_M_MMUREGS
1626 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1627 -1: sub %o1, %o4, %o1
1628 +1: sta %g0, [%o1] ASI_M_FLUSH_PROBE
1632 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1635 + sta %g5, [%g1] ASI_M_MMUREGS
1637 - sta %g5, [%g1] ASI_M_MMUREGS
1642 @@ -186,8 +190,9 @@ viking_flush_tlb_page:
1643 and %o1, PAGE_MASK, %o1
1644 sta %o3, [%g1] ASI_M_MMUREGS
1645 sta %g0, [%o1] ASI_M_FLUSH_PROBE
1646 + sta %g5, [%g1] ASI_M_MMUREGS
1648 - sta %g5, [%g1] ASI_M_MMUREGS
1653 @@ -209,8 +214,9 @@ sun4dsmp_flush_tlb_all:
1656 sta %g0, [%g1] ASI_M_FLUSH_PROBE
1657 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1659 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1663 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
1664 @@ -228,8 +234,9 @@ sun4dsmp_flush_tlb_mm:
1665 sta %o1, [%g1] ASI_M_MMUREGS
1666 sta %g0, [%g2] ASI_M_FLUSH_PROBE
1667 sta %g5, [%g1] ASI_M_MMUREGS
1668 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1670 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1674 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
1675 @@ -248,14 +255,15 @@ sun4dsmp_flush_tlb_range:
1676 sta %o3, [%g1] ASI_M_MMUREGS
1679 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1680 -2: sub %o1, %o4, %o1
1681 +2: sta %g0, [%o1] ASI_M_FLUSH_PROBE
1685 - sta %g0, [%o1] ASI_M_FLUSH_PROBE
1688 sta %g5, [%g1] ASI_M_MMUREGS
1689 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1691 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1695 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
1696 @@ -274,8 +282,9 @@ sun4dsmp_flush_tlb_page:
1697 sta %o3, [%g1] ASI_M_MMUREGS
1698 sta %g0, [%o1] ASI_M_FLUSH_PROBE
1699 sta %g5, [%g1] ASI_M_MMUREGS
1700 + stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1702 - stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
1706 ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5