leon: refresh patches to apply/compile on 5.15
[openwrt/staging/wigyori.git] / target / linux / leon / patches-5.15 / 0029-drivers-perf-Add-driver-for-L4STAT-in-GR740-configur.patch
1 From e3848e4e1e8a57462b0b2990e1a6ef96d55306bf Mon Sep 17 00:00:00 2001
2 From: Eneli Elbing <eneli.elbing@gaisler.com>
3 Date: Wed, 11 Jan 2023 10:53:33 +0100
4 Subject: [PATCH 29/32] drivers/perf: Add driver for L4STAT in GR740
5 configuration
6
7 The LEON4 Statistics Unit (L4STAT) is used to count events in the LEON4
8 processors and on the processor AHB bus. The driver supports L4STAT on
9 GR740. Performance statistics can be gathered using the perf stat
10 command.
11
12 Signed-off-by: Eneli Elbing <eneli.elbing@gaisler.com>
13 ---
14 Documentation/admin-guide/perf/index.rst | 1 +
15 Documentation/admin-guide/perf/l4stat_pmu.rst | 118 +++
16 MAINTAINERS | 6 +
17 arch/sparc/Kconfig | 2 +-
18 arch/sparc/include/asm/perf_event.h | 28 +-
19 arch/sparc/include/asm/perf_event_32.h | 25 +
20 .../asm/{perf_event.h => perf_event_64.h} | 4 +-
21 drivers/perf/Kconfig | 10 +
22 drivers/perf/Makefile | 1 +
23 drivers/perf/l4stat_pmu.c | 705 ++++++++++++++++++
24 10 files changed, 873 insertions(+), 27 deletions(-)
25 create mode 100644 Documentation/admin-guide/perf/l4stat_pmu.rst
26 create mode 100644 arch/sparc/include/asm/perf_event_32.h
27 copy arch/sparc/include/asm/{perf_event.h => perf_event_64.h} (91%)
28 create mode 100644 drivers/perf/l4stat_pmu.c
29
30 diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
31 index 5a8f2529a033..e4487442b463 100644
32 --- a/Documentation/admin-guide/perf/index.rst
33 +++ b/Documentation/admin-guide/perf/index.rst
34 @@ -16,3 +16,4 @@ Performance monitor support
35 xgene-pmu
36 arm_dsu_pmu
37 thunderx2-pmu
38 + l4stat_pmu
39 diff --git a/Documentation/admin-guide/perf/l4stat_pmu.rst b/Documentation/admin-guide/perf/l4stat_pmu.rst
40 new file mode 100644
41 index 000000000000..acdf87435842
42 --- /dev/null
43 +++ b/Documentation/admin-guide/perf/l4stat_pmu.rst
44 @@ -0,0 +1,118 @@
45 +.. SPDX-License-Identifier: GPL-2.0+
46 +
47 +==================================
48 +LEON4 Statistics Unit - L4STAT PMU
49 +==================================
50 +
51 +The LEON4 Statistics Unit (L4STAT) is used to count events in the LEON4
52 +processors and on the processor AHB bus. The statistics unit has 16 hardware
53 +counters. In its current form, the driver supports L4STAT on GR740. The
54 +documentation of the L4STAT core on GR740 can be found in the GR740 Data Sheet
55 +and User’s Manual at https://www.gaisler.com/gr740
56 +
57 +PMU (perf) driver
58 +-----------------
59 +
60 +The L4STAT driver registers a single PMU device, see
61 +/sys/bus/event_source/devices/l4stat. Events are listed in the events/ directory
62 +and available configurations in the format/ directory. An event can be referred
63 +to either by its symbolic name or its numeric, hexadecimal value with the
64 +config/event parameter. The config1/ahbm parameter is used to set the CPU/AHBM
65 +field in the control register. The config2/su parameter is used to set the SU
66 +field. Both are 0 by default.
67 +
68 +Task-specific vs system-wide mode
69 +---------------------------------
70 +
71 +In task-specific mode, the counters only run on the CPUs that the task is
72 +currently scheduled on. In system-wide mode (-a), the counters run on all CPUs,
73 +regardless of which CPUs the task is scheduled on. The -A flag can be used for a
74 +per-CPU breakdown in system-wide mode. Per-CPU breakdown in task-specific mode
75 +is not supported.
76 +
77 +Processor events
78 +----------------
79 +
80 +The default behaviour is to count processor events on all CPUs. Non-CPU AHBMs
81 +can be specified with the config1/ahbm parameter. The config1/ahbm parameter is
82 +ignored for config1/ahbm <= 3, i.e. AHBMs that correspond to the CPUs, since
83 +they are measured by default.
84 +
85 +When specifying config1/ahbm > 3 and using the -a flag, it should be done in
86 +conjunction with specifying a single CPU with the -C option, otherwise a counter
87 +is started on every CPU and the result will be a multiple of the number of CPUs.
88 +The same result can be achieved by omitting the -a flag, i.e. using
89 +task-specific mode instead.
90 +
91 +AHB and device-specific (external/user-defined) events
92 +------------------------------------------------------
93 +
94 +Filtering AHB and device-specific events is activated with the config2/su
95 +parameter. It is 0 by default, meaning that filtering is off, so the resulting
96 +counts are for all AHB masters in total, and the config1/ahbm parameter is
97 +ignored. Setting the config2/su parameter to 2 will result in behaviour similar
98 +to processor events, where config1/ahbm <= 3 is ignored and config1/ahbm > 3 is
99 +filtered. The config1/ahbm parameter is ignored in case of events that do not
100 +support CPU/AHBM filtering (0x63-0x6E). Setting config2/su to 3 will filter on
101 +any config1/ahbm, both CPU and non-CPU.
102 +
103 +Events generated from REQ/GNT signals
104 +-------------------------------------
105 +These events are active when an AHB master has request (REQ) asserted, while
106 +another AHB master has grant (GNT) asserted/deasserted. The REQ AHBM is set
107 +with the config1/ahbm parameter, while the GNT AHBM is set by the event ID.
108 +
109 +The MSB of the event ID specifies whether GNT is asserted (8) or deasserted (9).
110 +The LSB of the event ID specifies the GNT AHBM according to the following
111 +mapping:
112 +8:6 - Masters 2, 1, 0 on memory AHB bus
113 +5:0 - Masters on Processor AHB bus
114 +
115 +Limitations
116 +-----------
117 +
118 +* Sampling is not supported.
119 +This means that "perf record" and related commands will not work. Events can be
120 +counted with "perf stat" (see example usage below).
121 +* Scaling is not supported.
122 +This means that the number of events for a single run is limited by the number
123 +of hardware counters, i.e. 16. When measuring events on CPU AHBMs, each CPU
124 +takes up one counter each, limiting the number of CPU events that can be counted
125 +in parallel to four.
126 +
127 +Example usage
128 +-------------
129 +
130 + List all available events
131 + $# perf list
132 +
133 + Some command formatting examples for counting the total number of instructions
134 + $# perf stat -e proc_total_instructions sleep 1
135 + $# perf stat -e l4stat/proc_total_instructions/ sleep 1
136 + $# perf stat -e l4stat/event=0x11/ sleep 1
137 + $# perf stat -e l4stat/config=0x11/ sleep 1
138 +
139 + Some command formatting examples for specifying user and/or kernel space
140 + (u/k/uk)
141 + $# perf stat -e proc_total_instructions:u sleep 1
142 + $# perf stat -e l4stat/proc_total_instructions/u sleep 1
143 + $# perf stat -e l4stat/event=0x11,su=2/ sleep 1
144 +
145 + Count L2 cache misses for CPU3 (AHB master 3)
146 + $# perf stat -e l4stat/ext_l2cache_miss,ahbm=3/ sleep 1
147 +
148 + Count AHB BUSY cycles for all AHB masters in total (SU is 0 by default)
149 + $# perf stat -e l4stat/ahb_busy_cycles/ sleep 1
150 +
151 + Count AHB BUSY cycles for AHB master 4 (IO Memory Management Unit)
152 + $# perf stat -e l4stat/ahb_busy_cycles,ahbm=4,su=1/ sleep 1
153 +
154 + Count events where master 1 on the processor AHB has REQ asserted and master 2
155 + on the processor AHB has GNT asserted
156 + $# perf stat -e l4stat/reqgnt_ahbm1_proc,ahbm=2/ sleep 1
157 + $# perf stat -e l4stat/event=0x81,ahbm=2/ sleep 1
158 +
159 + Count events where master 1 on the processor AHB has REQ asserted and master 2
160 + on the processor AHB has GNT deasserted
161 + $# perf stat -e l4stat/req_ahbm1_proc,ahbm=2/ sleep 1
162 + $# perf stat -e l4stat/event=0x91,ahbm=2/ sleep 1
163 diff --git a/MAINTAINERS b/MAINTAINERS
164 index 6c5efc4013ab..69a9bfa14bc2 100644
165 --- a/MAINTAINERS
166 +++ b/MAINTAINERS
167 @@ -9822,6 +9822,12 @@ S: Maintained
168 F: include/net/l3mdev.h
169 F: net/l3mdev
170
171 +L4STAT LEON4 STATISTICS UNIT DRIVER FOR GR740
172 +M: Eneli Elbing <eneli.elbing@gaisler.com>
173 +S: Maintained
174 +F: Documentation/admin-guide/perf/l4stat_pmu.rst
175 +F: drivers/perf/l4stat_pmu.c
176 +
177 L7 BPF FRAMEWORK
178 M: John Fastabend <john.fastabend@gmail.com>
179 M: Daniel Borkmann <daniel@iogearbox.net>
180 diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
181 index 8188cae9786f..b38222ba3ae7 100644
182 --- a/arch/sparc/Kconfig
183 +++ b/arch/sparc/Kconfig
184 @@ -52,6 +52,7 @@ config SPARC
185 select NEED_SG_DMA_LENGTH
186 select SET_FS
187 select TRACE_IRQFLAGS_SUPPORT
188 + select HAVE_PERF_EVENTS
189
190 config SPARC32
191 def_bool !64BIT
192 @@ -82,7 +83,6 @@ config SPARC64
193 select RTC_DRV_BQ4802
194 select RTC_DRV_SUN4V
195 select RTC_DRV_STARFIRE
196 - select HAVE_PERF_EVENTS
197 select PERF_USE_VMALLOC
198 select ARCH_HAVE_NMI_SAFE_CMPXCHG
199 select HAVE_C_RECORDMCOUNT
200 diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
201 index c2aec0c7f4f5..083d34c1189f 100644
202 --- a/arch/sparc/include/asm/perf_event.h
203 +++ b/arch/sparc/include/asm/perf_event.h
204 @@ -2,29 +2,9 @@
205 #ifndef __ASM_SPARC_PERF_EVENT_H
206 #define __ASM_SPARC_PERF_EVENT_H
207
208 -#ifdef CONFIG_PERF_EVENTS
209 -#include <asm/ptrace.h>
210 -
211 -#define perf_arch_fetch_caller_regs(regs, ip) \
212 -do { \
213 - unsigned long _pstate, _asi, _pil, _i7, _fp; \
214 - __asm__ __volatile__("rdpr %%pstate, %0\n\t" \
215 - "rd %%asi, %1\n\t" \
216 - "rdpr %%pil, %2\n\t" \
217 - "mov %%i7, %3\n\t" \
218 - "mov %%i6, %4\n\t" \
219 - : "=r" (_pstate), \
220 - "=r" (_asi), \
221 - "=r" (_pil), \
222 - "=r" (_i7), \
223 - "=r" (_fp)); \
224 - (regs)->tstate = (_pstate << 8) | \
225 - (_asi << 24) | (_pil << 20); \
226 - (regs)->tpc = (ip); \
227 - (regs)->tnpc = (regs)->tpc + 4; \
228 - (regs)->u_regs[UREG_I6] = _fp; \
229 - (regs)->u_regs[UREG_I7] = _i7; \
230 -} while (0)
231 +#if defined(__sparc__) && defined(__arch64__)
232 +#include <asm/perf_event_64.h>
233 +#else
234 +#include <asm/perf_event_32.h>
235 #endif
236 -
237 #endif
238 diff --git a/arch/sparc/include/asm/perf_event_32.h b/arch/sparc/include/asm/perf_event_32.h
239 new file mode 100644
240 index 000000000000..b300e05ba6eb
241 --- /dev/null
242 +++ b/arch/sparc/include/asm/perf_event_32.h
243 @@ -0,0 +1,25 @@
244 +/* SPDX-License-Identifier: GPL-2.0 */
245 +#ifndef SPARC_PERF_EVENT_H
246 +#define SPARC_PERF_EVENT_H
247 +
248 +#ifdef CONFIG_PERF_EVENTS
249 +#include <asm/ptrace.h>
250 +
251 +#define perf_arch_fetch_caller_regs(regs, ip) \
252 +do { \
253 + unsigned long _psr, _i7, _fp; \
254 + __asm__ __volatile__("rd %%psr, %0\n\t" \
255 + "mov %%i7, %1\n\t" \
256 + "mov %%i6, %2\n\t" \
257 + : "=r" (_psr), \
258 + "=r" (_i7), \
259 + "=r" (_fp)); \
260 + (regs)->psr = _psr; \
261 + (regs)->pc = (ip); \
262 + (regs)->npc = (regs)->pc + 4; \
263 + (regs)->u_regs[UREG_I6] = _fp; \
264 + (regs)->u_regs[UREG_I7] = _i7; \
265 +} while (0)
266 +#endif
267 +
268 +#endif
269 diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event_64.h
270 similarity index 91%
271 copy from arch/sparc/include/asm/perf_event.h
272 copy to arch/sparc/include/asm/perf_event_64.h
273 index c2aec0c7f4f5..a6bbfe73d23c 100644
274 --- a/arch/sparc/include/asm/perf_event.h
275 +++ b/arch/sparc/include/asm/perf_event_64.h
276 @@ -1,6 +1,6 @@
277 /* SPDX-License-Identifier: GPL-2.0 */
278 -#ifndef __ASM_SPARC_PERF_EVENT_H
279 -#define __ASM_SPARC_PERF_EVENT_H
280 +#ifndef SPARC64_PERF_EVENT_H
281 +#define SPARC64_PERF_EVENT_H
282
283 #ifdef CONFIG_PERF_EVENTS
284 #include <asm/ptrace.h>
285 diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
286 index 130327ff0b0e..a53ca336c434 100644
287 --- a/drivers/perf/Kconfig
288 +++ b/drivers/perf/Kconfig
289 @@ -130,6 +130,16 @@ config ARM_SPE_PMU
290 Extension, which provides periodic sampling of operations in
291 the CPU pipeline and reports this via the perf AUX interface.
292
293 +config L4STAT_PMU
294 + bool "Gaisler L4STAT statistics unit support for GR740"
295 + depends on SPARC
296 + help
297 + Enable perf driver for Gaisler LEON4 Statistics Unit (L4STAT).
298 +
299 + Adds support for counting events in the LEON4 processor
300 + and on the AHB bus, in order to create performance statistics
301 + for the GR740 SoC.
302 +
303 source "drivers/perf/hisilicon/Kconfig"
304
305 endmenu
306 diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
307 index 5365fd56f88f..b64542c520ef 100644
308 --- a/drivers/perf/Makefile
309 +++ b/drivers/perf/Makefile
310 @@ -13,3 +13,4 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
311 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
312 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
313 obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
314 +obj-$(CONFIG_L4STAT_PMU) += l4stat_pmu.o
315 diff --git a/drivers/perf/l4stat_pmu.c b/drivers/perf/l4stat_pmu.c
316 new file mode 100644
317 index 000000000000..07ee732752cb
318 --- /dev/null
319 +++ b/drivers/perf/l4stat_pmu.c
320 @@ -0,0 +1,705 @@
321 +// SPDX-License-Identifier: GPL-2.0+
322 +/*
323 + * PMU driver for Cobham Gaisler L4STAT on GR740.
324 + *
325 + * 2022 (c) Cobham Gaisler AB
326 + *
327 + * This driver supports L4STAT Statistical Unit cores available in the
328 + * GRLIB VHDL IP core library.
329 + *
330 + * Full documentation of the L4STAT core on GR740 can be found in the GR740 Data
331 + * Sheet and User’s Manual at https://www.gaisler.com/gr740
332 + *
333 + * Contributors:
334 + * - Eneli Elbing <eneli.elbing@gaisler.com>
335 + */
336 +
337 +#include <linux/kernel.h>
338 +#include <linux/perf_event.h>
339 +#include <linux/platform_device.h>
340 +
341 +#define DRV_NAME "l4stat"
342 +#define GR740_MAX_CPUID 3
343 +#define L4STAT_NUM_CNTRS 16
344 +
345 +#define SU_OPT1 0x2
346 +#define SU_OPT2 0x3
347 +
348 +#define CCTRL_SU_USER (0x2 << CCTRL_SU_BIT)
349 +#define CCTRL_SU_KERNEL (0x1 << CCTRL_SU_BIT)
350 +#define CCTRL_EN (0x1 << CCTRL_EN_BIT)
351 +
352 +#define CCTRL_SU_BIT 14
353 +#define CCTRL_EN_BIT 12
354 +#define CCTRL_EVENTID_BIT 4
355 +#define CCTRL_CPUAHBM_BIT 0
356 +
357 +#define L4STAT_CVAL 0x000
358 +#define L4STAT_CCTRL 0x080
359 +
360 +#define APB_ADDR_OFFSET(cntr_idx) (4*cntr_idx)
361 +
362 +#define PROC_EVENT_RANGE_START L4STAT_EVENT_ICACHE_MISS
363 +#define PROC_EVENT_RANGE_END L4STAT_EVENT_STORE_INSTRUCTIONS
364 +
365 +/*
366 + * L4STAT processor events
367 + */
368 +#define L4STAT_EVENT_ICACHE_MISS 0x00
369 +#define L4STAT_EVENT_IMMU_TLB_MISS 0x01
370 +#define L4STAT_EVENT_ICACHE_HOLD 0x02
371 +#define L4STAT_EVENT_IMMU_HOLD 0x03
372 +#define L4STAT_EVENT_DCACHE_MISS 0x08
373 +#define L4STAT_EVENT_DMMU_TLB_MISS 0x09
374 +#define L4STAT_EVENT_DCACHE_HOLD 0x0a
375 +#define L4STAT_EVENT_DMMU_HOLD 0x0b
376 +#define L4STAT_EVENT_DATA_WRITE_BUFFER_HOLD 0x10
377 +#define L4STAT_EVENT_TOTAL_INSTRUCTIONS 0x11
378 +#define L4STAT_EVENT_INT_INSTRUCTIONS 0x12
379 +#define L4STAT_EVENT_FPU_INSTRUCTIONS 0x13
380 +#define L4STAT_EVENT_BRANCH_PREDICTION_MISS 0x14
381 +#define L4STAT_EVENT_EXECUTION_TIME 0x15
382 +#define L4STAT_EVENT_AHB_UTILIZATION 0x17
383 +#define L4STAT_EVENT_AHB_TOTAL_UTILIZATION 0x18
384 +#define L4STAT_EVENT_INT_BRANCHES 0x22
385 +#define L4STAT_EVENT_CALL_INSTRUCTIONS 0x28
386 +#define L4STAT_EVENT_REGULAR_TYPE2_INSTRUCTIONS 0x30
387 +#define L4STAT_EVENT_LOAD_AND_STORE_INSTRUCTIONS 0x38
388 +#define L4STAT_EVENT_LOAD_INSTRUCTIONS 0x39
389 +#define L4STAT_EVENT_STORE_INSTRUCTIONS 0x3a
390 +
391 +/*
392 + * L4STAT AHB events
393 + * (counted via LEON4 Debug Support Unit)
394 + */
395 +#define L4STAT_EVENT_AHB_IDLE_CYCLES 0x40
396 +#define L4STAT_EVENT_AHB_BUSY_CYCLES 0x41
397 +#define L4STAT_EVENT_AHB_NON_SEQ_TRANSFERS 0x42
398 +#define L4STAT_EVENT_AHB_SEQ_TRANSFERS 0x43
399 +#define L4STAT_EVENT_AHB_READ_ACCESSES 0x44
400 +#define L4STAT_EVENT_AHB_WRITE_ACCESSES 0x45
401 +#define L4STAT_EVENT_AHB_BYTE_ACCESSES 0x46
402 +#define L4STAT_EVENT_AHB_HALF_WORD_ACCESSES 0x47
403 +#define L4STAT_EVENT_AHB_WORD_ACCESSES 0x48
404 +#define L4STAT_EVENT_AHB_DOUBLE_WORD_ACCESSES 0x49
405 +#define L4STAT_EVENT_AHB_QUAD_WORD_ACCESSES 0x4a
406 +#define L4STAT_EVENT_AHB_EIGHT_WORD_ACCESSES 0x4b
407 +#define L4STAT_EVENT_AHB_WAITSTATES 0x4c
408 +#define L4STAT_EVENT_AHB_RETRY_RESPONSES 0x4d
409 +#define L4STAT_EVENT_AHB_SPLIT_RESPONSES 0x4e
410 +#define L4STAT_EVENT_AHB_SPLIT_DELAY 0x4f
411 +#define L4STAT_EVENT_AHB_BUS_LOCKED 0x50
412 +
413 +/*
414 + * L4STAT device specific events
415 + * (may be marked as user defined events in generic software drivers)
416 + */
417 +#define L4STAT_EVENT_L2CACHE_HIT 0x60
418 +#define L4STAT_EVENT_L2CACHE_MISS 0x61
419 +#define L4STAT_EVENT_L2CACHE_BUS_ACCESS 0x62
420 +#define L4STAT_EVENT_L2CACHE_TAG_CORRECTABLE_ERROR 0x63
421 +#define L4STAT_EVENT_L2CACHE_TAG_UNCORRECTABLE_ERROR 0x64
422 +#define L4STAT_EVENT_L2CACHE_DATA_CORRECTABLE_ERROR 0x65
423 +#define L4STAT_EVENT_L2CACHE_DATA_UNCORRECTABLE_ERROR 0x66
424 +#define L4STAT_EVENT_IOMMU_CACHE_LOOKUP 0x67
425 +#define L4STAT_EVENT_IOMMU_TABLE_WALK 0x68
426 +#define L4STAT_EVENT_IOMMU_ACCESS_DENIED 0x69
427 +#define L4STAT_EVENT_IOMMU_ACCESS_OK 0x6a
428 +#define L4STAT_EVENT_IOMMU_ACCESS_PASSTHROUGH 0x6b
429 +#define L4STAT_EVENT_IOMMU_CACHE_TLB_MISS 0x6c
430 +#define L4STAT_EVENT_IOMMU_CACHE_TLB_HIT 0x6d
431 +#define L4STAT_EVENT_IOMMU_CACHE_TLB_PARITY_ERROR 0x6e
432 +
433 +/*
434 + * L4STAT AHB events
435 + * (only available if core is connected to a standalone AHB trace buffer):
436 + */
437 +#define L4STAT_EVENT_AHB_TRACE_IDLE_CYCLES 0x70
438 +#define L4STAT_EVENT_AHB_TRACE_BUSY_CYCLES 0x71
439 +#define L4STAT_EVENT_AHB_TRACE_NON_SEQ_TRANSFERS 0x72
440 +#define L4STAT_EVENT_AHB_TRACE_SEQ_TRANSFERS 0x73
441 +#define L4STAT_EVENT_AHB_TRACE_READ_ACCESSES 0x74
442 +#define L4STAT_EVENT_AHB_TRACE_WRITE_ACCESSES 0x75
443 +#define L4STAT_EVENT_AHB_TRACE_BYTE_ACCESSES 0x76
444 +#define L4STAT_EVENT_AHB_TRACE_HALF_WORD_ACCESSES 0x77
445 +#define L4STAT_EVENT_AHB_TRACE_WORD_ACCESSES 0x78
446 +#define L4STAT_EVENT_AHB_TRACE_DOUBLE_WORD_ACCESSES 0x79
447 +#define L4STAT_EVENT_AHB_TRACE_QUAD_WORD_ACCESSES 0x7a
448 +#define L4STAT_EVENT_AHB_TRACE_EIGHT_WORD_ACCESSES 0x7b
449 +#define L4STAT_EVENT_AHB_TRACE_WAITSTATES 0x7c
450 +#define L4STAT_EVENT_AHB_TRACE_RETRY_RESPONSES 0x7d
451 +#define L4STAT_EVENT_AHB_TRACE_SPLIT_RESPONSES 0x7e
452 +#define L4STAT_EVENT_AHB_TRACE_SPLIT_DELAY 0x7f
453 +
454 +/*
455 + * L4STAT events generated from REQ/GNT signals
456 + */
457 +# define L4STAT_EVENT_REQ_GNT_AHBM0_PROC 0x80
458 +# define L4STAT_EVENT_REQ_GNT_AHBM1_PROC 0x81
459 +# define L4STAT_EVENT_REQ_GNT_AHBM2_PROC 0x82
460 +# define L4STAT_EVENT_REQ_GNT_AHBM3_PROC 0x83
461 +# define L4STAT_EVENT_REQ_GNT_AHBM4_PROC 0x84
462 +# define L4STAT_EVENT_REQ_GNT_AHBM5_PROC 0x85
463 +# define L4STAT_EVENT_REQ_GNT_AHBM0_MEM 0x86
464 +# define L4STAT_EVENT_REQ_GNT_AHBM1_MEM 0x87
465 +# define L4STAT_EVENT_REQ_GNT_AHBM2_MEM 0x88
466 +# define L4STAT_EVENT_REQ_AHBM0_PROC 0x90
467 +# define L4STAT_EVENT_REQ_AHBM1_PROC 0x91
468 +# define L4STAT_EVENT_REQ_AHBM2_PROC 0x92
469 +# define L4STAT_EVENT_REQ_AHBM3_PROC 0x93
470 +# define L4STAT_EVENT_REQ_AHBM4_PROC 0x94
471 +# define L4STAT_EVENT_REQ_AHBM5_PROC 0x95
472 +# define L4STAT_EVENT_REQ_AHBM0_MEM 0x96
473 +# define L4STAT_EVENT_REQ_AHBM1_MEM 0x97
474 +# define L4STAT_EVENT_REQ_AHBM2_MEM 0x98
475 +
476 +#define to_l4stat_pmu(c) (container_of(c, struct l4stat_pmu, pmu))
477 +
478 +ssize_t l4stat_format_sysfs_show(struct device *dev,
479 + struct device_attribute *attr, char *buf)
480 +{
481 + struct dev_ext_attribute *eattr;
482 +
483 + eattr = container_of(attr, struct dev_ext_attribute, attr);
484 +
485 + return sprintf(buf, "%s\n", (char *)eattr->var);
486 +}
487 +
488 +ssize_t l4stat_event_sysfs_show(struct device *dev,
489 + struct device_attribute *attr, char *page)
490 +{
491 + struct dev_ext_attribute *eattr;
492 +
493 + eattr = container_of(attr, struct dev_ext_attribute, attr);
494 +
495 + return sprintf(page, "config=0x%lx\n",
496 + (unsigned long)eattr->var);
497 +}
498 +
499 +#define L4STAT_ATTR(_name, _func, _config) \
500 + (&((struct dev_ext_attribute[]) { \
501 + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \
502 + })[0].attr.attr)
503 +
504 +#define L4STAT_FORMAT_ATTR(_name, _config) \
505 + L4STAT_ATTR(_name, l4stat_format_sysfs_show, (void *)_config)
506 +
507 +#define L4STAT_EVENT_ATTR(_name, _config) \
508 + L4STAT_ATTR(_name, l4stat_event_sysfs_show, (unsigned long)_config)
509 +
510 +/*
511 + * Kernel PMU event attributes
512 + */
513 +static struct attribute *l4stat_pmu_events[] = {
514 + /*
515 + * Processor events
516 + */
517 + L4STAT_EVENT_ATTR(proc_icache_miss, L4STAT_EVENT_ICACHE_MISS),
518 + L4STAT_EVENT_ATTR(proc_immu_tlb_miss, L4STAT_EVENT_IMMU_TLB_MISS),
519 + L4STAT_EVENT_ATTR(proc_icache_hold, L4STAT_EVENT_ICACHE_HOLD),
520 + L4STAT_EVENT_ATTR(proc_immu_hold, L4STAT_EVENT_IMMU_HOLD),
521 + L4STAT_EVENT_ATTR(proc_dcache_miss, L4STAT_EVENT_DCACHE_MISS),
522 + L4STAT_EVENT_ATTR(proc_dmmu_tlb_miss, L4STAT_EVENT_DMMU_TLB_MISS),
523 + L4STAT_EVENT_ATTR(proc_dcache_hold, L4STAT_EVENT_DCACHE_HOLD),
524 + L4STAT_EVENT_ATTR(proc_dmmu_hold, L4STAT_EVENT_DMMU_HOLD),
525 + L4STAT_EVENT_ATTR(proc_data_write_buffer_hold,
526 + L4STAT_EVENT_DATA_WRITE_BUFFER_HOLD),
527 + L4STAT_EVENT_ATTR(proc_total_instructions,
528 + L4STAT_EVENT_TOTAL_INSTRUCTIONS),
529 + L4STAT_EVENT_ATTR(proc_int_instructions, L4STAT_EVENT_INT_INSTRUCTIONS),
530 + L4STAT_EVENT_ATTR(proc_fpu_instructions, L4STAT_EVENT_FPU_INSTRUCTIONS),
531 + L4STAT_EVENT_ATTR(proc_branch_prediction_miss,
532 + L4STAT_EVENT_BRANCH_PREDICTION_MISS),
533 + L4STAT_EVENT_ATTR(proc_execution_time, L4STAT_EVENT_EXECUTION_TIME),
534 + L4STAT_EVENT_ATTR(proc_ahb_utilization, L4STAT_EVENT_AHB_UTILIZATION),
535 + L4STAT_EVENT_ATTR(proc_ahb_total_utilization,
536 + L4STAT_EVENT_AHB_TOTAL_UTILIZATION),
537 + L4STAT_EVENT_ATTR(proc_int_branches, L4STAT_EVENT_INT_BRANCHES),
538 + L4STAT_EVENT_ATTR(proc_call_instructions,
539 + L4STAT_EVENT_CALL_INSTRUCTIONS),
540 + L4STAT_EVENT_ATTR(proc_regular_type2_instructions,
541 + L4STAT_EVENT_REGULAR_TYPE2_INSTRUCTIONS),
542 + L4STAT_EVENT_ATTR(proc_load_and_store_instructions,
543 + L4STAT_EVENT_LOAD_AND_STORE_INSTRUCTIONS),
544 + L4STAT_EVENT_ATTR(proc_load_instructions,
545 + L4STAT_EVENT_LOAD_INSTRUCTIONS),
546 + L4STAT_EVENT_ATTR(proc_store_instructions,
547 + L4STAT_EVENT_STORE_INSTRUCTIONS),
548 + /*
549 + * AHB Events (LEON4 Debug Support Unit)
550 + */
551 + L4STAT_EVENT_ATTR(ahb_busy_cycles, L4STAT_EVENT_AHB_BUSY_CYCLES),
552 + L4STAT_EVENT_ATTR(ahb_non_seq_transfers,
553 + L4STAT_EVENT_AHB_NON_SEQ_TRANSFERS),
554 + L4STAT_EVENT_ATTR(ahb_seq_transfers, L4STAT_EVENT_AHB_SEQ_TRANSFERS),
555 + L4STAT_EVENT_ATTR(ahb_idle_cycles, L4STAT_EVENT_AHB_IDLE_CYCLES),
556 + L4STAT_EVENT_ATTR(ahb_read_accesses, L4STAT_EVENT_AHB_READ_ACCESSES),
557 + L4STAT_EVENT_ATTR(ahb_write_accesses, L4STAT_EVENT_AHB_WRITE_ACCESSES),
558 + L4STAT_EVENT_ATTR(ahb_byte_accesses, L4STAT_EVENT_AHB_BYTE_ACCESSES),
559 + L4STAT_EVENT_ATTR(ahb_half_word_accesses,
560 + L4STAT_EVENT_AHB_HALF_WORD_ACCESSES),
561 + L4STAT_EVENT_ATTR(ahb_word_accesses, L4STAT_EVENT_AHB_WORD_ACCESSES),
562 + L4STAT_EVENT_ATTR(ahb_double_word_accesses,
563 + L4STAT_EVENT_AHB_DOUBLE_WORD_ACCESSES),
564 + L4STAT_EVENT_ATTR(ahb_quad_word_accesses,
565 + L4STAT_EVENT_AHB_QUAD_WORD_ACCESSES),
566 + L4STAT_EVENT_ATTR(ahb_eight_word_accesses,
567 + L4STAT_EVENT_AHB_EIGHT_WORD_ACCESSES),
568 + L4STAT_EVENT_ATTR(ahb_waitstates, L4STAT_EVENT_AHB_WAITSTATES),
569 + L4STAT_EVENT_ATTR(ahb_retry_responses,
570 + L4STAT_EVENT_AHB_RETRY_RESPONSES),
571 + L4STAT_EVENT_ATTR(ahb_split_responses,
572 + L4STAT_EVENT_AHB_SPLIT_RESPONSES),
573 + L4STAT_EVENT_ATTR(ahb_split_delay, L4STAT_EVENT_AHB_SPLIT_DELAY),
574 + L4STAT_EVENT_ATTR(ahb_bus_locked, L4STAT_EVENT_AHB_BUS_LOCKED),
575 + /*
576 + * Device specific events
577 + */
578 + L4STAT_EVENT_ATTR(ext_l2cache_hit, L4STAT_EVENT_L2CACHE_HIT),
579 + L4STAT_EVENT_ATTR(ext_l2cache_miss, L4STAT_EVENT_L2CACHE_MISS),
580 + L4STAT_EVENT_ATTR(ext_l2cache_bus_access,
581 + L4STAT_EVENT_L2CACHE_BUS_ACCESS),
582 + L4STAT_EVENT_ATTR(ext_l2cache_tag_correctable_error,
583 + L4STAT_EVENT_L2CACHE_TAG_CORRECTABLE_ERROR),
584 + L4STAT_EVENT_ATTR(ext_l2cache_tag_uncorrectable_error,
585 + L4STAT_EVENT_L2CACHE_TAG_UNCORRECTABLE_ERROR),
586 + L4STAT_EVENT_ATTR(ext_l2cache_data_correctable_error,
587 + L4STAT_EVENT_L2CACHE_DATA_CORRECTABLE_ERROR),
588 + L4STAT_EVENT_ATTR(ext_l2cache_data_uncorrectable_error,
589 + L4STAT_EVENT_L2CACHE_DATA_UNCORRECTABLE_ERROR),
590 + L4STAT_EVENT_ATTR(ext_iommu_cache_lookup,
591 + L4STAT_EVENT_IOMMU_CACHE_LOOKUP),
592 + L4STAT_EVENT_ATTR(ext_iommu_table_walk, L4STAT_EVENT_IOMMU_TABLE_WALK),
593 + L4STAT_EVENT_ATTR(ext_iommu_access_denied,
594 + L4STAT_EVENT_IOMMU_ACCESS_DENIED),
595 + L4STAT_EVENT_ATTR(ext_iommu_access_ok, L4STAT_EVENT_IOMMU_ACCESS_OK),
596 + L4STAT_EVENT_ATTR(ext_iommu_access_passthrough,
597 + L4STAT_EVENT_IOMMU_ACCESS_PASSTHROUGH),
598 + L4STAT_EVENT_ATTR(ext_iommu_cache_tlb_miss,
599 + L4STAT_EVENT_IOMMU_CACHE_TLB_MISS),
600 + L4STAT_EVENT_ATTR(ext_iommu_cache_tlb_hit,
601 + L4STAT_EVENT_IOMMU_CACHE_TLB_HIT),
602 + L4STAT_EVENT_ATTR(ext_iommu_cache_tlb_parity_error,
603 + L4STAT_EVENT_IOMMU_CACHE_TLB_PARITY_ERROR),
604 + /*
605 + * AHB events (standalone AHB trace buffer)
606 + */
607 + L4STAT_EVENT_ATTR(ahbtrace_busy_cycles,
608 + L4STAT_EVENT_AHB_TRACE_BUSY_CYCLES),
609 + L4STAT_EVENT_ATTR(ahbtrace_non_seq_transfers,
610 + L4STAT_EVENT_AHB_TRACE_NON_SEQ_TRANSFERS),
611 + L4STAT_EVENT_ATTR(ahbtrace_seq_transfers,
612 + L4STAT_EVENT_AHB_TRACE_SEQ_TRANSFERS),
613 + L4STAT_EVENT_ATTR(ahbtrace_idle_cycles,
614 + L4STAT_EVENT_AHB_TRACE_IDLE_CYCLES),
615 + L4STAT_EVENT_ATTR(ahbtrace_read_accesses,
616 + L4STAT_EVENT_AHB_TRACE_READ_ACCESSES),
617 + L4STAT_EVENT_ATTR(ahbtrace_write_accesses,
618 + L4STAT_EVENT_AHB_TRACE_WRITE_ACCESSES),
619 + L4STAT_EVENT_ATTR(ahbtrace_byte_accesses,
620 + L4STAT_EVENT_AHB_TRACE_BYTE_ACCESSES),
621 + L4STAT_EVENT_ATTR(ahbtrace_half_word_accesses,
622 + L4STAT_EVENT_AHB_TRACE_HALF_WORD_ACCESSES),
623 + L4STAT_EVENT_ATTR(ahbtrace_word_accesses,
624 + L4STAT_EVENT_AHB_TRACE_WORD_ACCESSES),
625 + L4STAT_EVENT_ATTR(ahbtrace_double_word_accesses,
626 + L4STAT_EVENT_AHB_TRACE_DOUBLE_WORD_ACCESSES),
627 + L4STAT_EVENT_ATTR(ahbtrace_quad_word_accesses,
628 + L4STAT_EVENT_AHB_TRACE_QUAD_WORD_ACCESSES),
629 + L4STAT_EVENT_ATTR(ahbtrace_eight_word_accesses,
630 + L4STAT_EVENT_AHB_TRACE_EIGHT_WORD_ACCESSES),
631 + L4STAT_EVENT_ATTR(ahbtrace_waitstates,
632 + L4STAT_EVENT_AHB_TRACE_WAITSTATES),
633 + L4STAT_EVENT_ATTR(ahbtrace_retry_responses,
634 + L4STAT_EVENT_AHB_TRACE_RETRY_RESPONSES),
635 + L4STAT_EVENT_ATTR(ahbtrace_split_responses,
636 + L4STAT_EVENT_AHB_TRACE_SPLIT_RESPONSES),
637 + L4STAT_EVENT_ATTR(ahbtrace_split_delay,
638 + L4STAT_EVENT_AHB_TRACE_SPLIT_DELAY),
639 + /*
640 + * Events generated from REQ/GNT signals
641 + */
642 + L4STAT_EVENT_ATTR(reqgnt_ahbm0_proc, L4STAT_EVENT_REQ_GNT_AHBM0_PROC),
643 + L4STAT_EVENT_ATTR(reqgnt_ahbm1_proc, L4STAT_EVENT_REQ_GNT_AHBM1_PROC),
644 + L4STAT_EVENT_ATTR(reqgnt_ahbm2_proc, L4STAT_EVENT_REQ_GNT_AHBM2_PROC),
645 + L4STAT_EVENT_ATTR(reqgnt_ahbm3_proc, L4STAT_EVENT_REQ_GNT_AHBM3_PROC),
646 + L4STAT_EVENT_ATTR(reqgnt_ahbm4_proc, L4STAT_EVENT_REQ_GNT_AHBM4_PROC),
647 + L4STAT_EVENT_ATTR(reqgnt_ahbm5_proc, L4STAT_EVENT_REQ_GNT_AHBM5_PROC),
648 + L4STAT_EVENT_ATTR(reqgnt_ahbm0_mem, L4STAT_EVENT_REQ_GNT_AHBM0_MEM),
649 + L4STAT_EVENT_ATTR(reqgnt_ahbm1_mem, L4STAT_EVENT_REQ_GNT_AHBM1_MEM),
650 + L4STAT_EVENT_ATTR(reqgnt_ahbm2_mem, L4STAT_EVENT_REQ_GNT_AHBM2_MEM),
651 + L4STAT_EVENT_ATTR(req_ahbm0_proc, L4STAT_EVENT_REQ_AHBM0_PROC),
652 + L4STAT_EVENT_ATTR(req_ahbm1_proc, L4STAT_EVENT_REQ_AHBM1_PROC),
653 + L4STAT_EVENT_ATTR(req_ahbm2_proc, L4STAT_EVENT_REQ_AHBM2_PROC),
654 + L4STAT_EVENT_ATTR(req_ahbm3_proc, L4STAT_EVENT_REQ_AHBM3_PROC),
655 + L4STAT_EVENT_ATTR(req_ahbm4_proc, L4STAT_EVENT_REQ_AHBM4_PROC),
656 + L4STAT_EVENT_ATTR(req_ahbm5_proc, L4STAT_EVENT_REQ_AHBM5_PROC),
657 + L4STAT_EVENT_ATTR(req_ahbm0_mem, L4STAT_EVENT_REQ_AHBM0_MEM),
658 + L4STAT_EVENT_ATTR(req_ahbm1_mem, L4STAT_EVENT_REQ_AHBM1_MEM),
659 + L4STAT_EVENT_ATTR(req_ahbm2_mem, L4STAT_EVENT_REQ_AHBM2_MEM),
660 + NULL,
661 +};
662 +
663 +static struct attribute *l4stat_pmu_format[] = {
664 + L4STAT_FORMAT_ATTR(event, "config:0-7"), // EID is 8 bits
665 + L4STAT_FORMAT_ATTR(ahbm, "config1:0-3"), // AHBM is 4 bits
666 + L4STAT_FORMAT_ATTR(su, "config2:0-1"), // SU is 2 bits
667 + NULL,
668 +};
669 +
670 +static const struct attribute_group l4stat_pmu_format_group = {
671 + .name = "format",
672 + .attrs = l4stat_pmu_format,
673 +};
674 +
675 +static const struct attribute_group l4stat_pmu_events_group = {
676 + .name = "events",
677 + .attrs = l4stat_pmu_events,
678 +};
679 +
680 +static const struct attribute_group *l4stat_pmu_attr_groups[] = {
681 + &l4stat_pmu_format_group,
682 + &l4stat_pmu_events_group,
683 + NULL,
684 +};
685 +
686 +struct l4stat_pmu_hw_events {
687 + struct perf_event **events;
688 + unsigned long *used_mask;
689 +};
690 +
691 +struct l4stat_pmu {
692 + void __iomem *regs;
693 + struct pmu pmu;
694 + struct l4stat_pmu_hw_events hw_events;
695 + struct platform_device *platform_dev;
696 +};
697 +
698 +static inline u32 l4stat_pmu_read_register(struct l4stat_pmu *l4stat_pmu,
699 + int idx, unsigned int reg)
700 +{
701 + unsigned int offset = APB_ADDR_OFFSET(idx) + reg;
702 +
703 + return ioread32be(l4stat_pmu->regs + offset);
704 +}
705 +
706 +static inline void l4stat_pmu_write_register(struct l4stat_pmu *l4stat_pmu,
707 + u32 val, int idx, unsigned int reg)
708 +{
709 + int offset = APB_ADDR_OFFSET(idx) + reg;
710 +
711 + iowrite32be(val, l4stat_pmu->regs + offset);
712 +}
713 +
714 +static u32 l4stat_pmu_read_counter(struct perf_event *event)
715 +{
716 + struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
717 + struct hw_perf_event *hw_counter = &event->hw;
718 + int idx = hw_counter->idx;
719 + unsigned int reg = L4STAT_CVAL;
720 + u32 value;
721 +
722 + value = l4stat_pmu_read_register(l4stat_pmu, idx, reg);
723 +
724 + return value;
725 +}
726 +
727 +static void l4stat_pmu_clear_counter(struct l4stat_pmu *l4stat_pmu, int idx)
728 +{
729 + l4stat_pmu_write_register(l4stat_pmu, 0, idx, L4STAT_CVAL);
730 +}
731 +
732 +static int l4stat_get_event_idx(struct l4stat_pmu_hw_events *hw)
733 +{
734 + int idx;
735 +
736 + /* Generic code to find an unused idx from the mask */
737 + for (idx = 0; idx < L4STAT_NUM_CNTRS; ++idx)
738 + if (!test_and_set_bit(idx, hw->used_mask))
739 + return idx;
740 +
741 + /* No counters available */
742 + return -EAGAIN;
743 +}
744 +
745 +static int l4stat_map_raw_event(u64 config)
746 +{
747 + int mapping = (int)(config);
748 + return mapping;
749 +}
750 +
751 +int l4stat_map_event(struct perf_event *event)
752 +{
753 + u64 config = event->attr.config;
754 + int type = event->attr.type;
755 +
756 + if (type == event->pmu->type)
757 + return l4stat_map_raw_event(config);
758 +
759 + return -ENOENT;
760 +}
761 +
762 +static int l4stat_pmu_event_init(struct perf_event *event)
763 +{
764 + struct hw_perf_event *hwc = &event->hw;
765 + int mapping;
766 +
767 + mapping = l4stat_map_event(event);
768 +
769 + if (mapping < 0) {
770 + pr_debug("Invalid event %x:%llx\n", event->attr.type,
771 + event->attr.config);
772 + return mapping;
773 + }
774 +
775 + /*
776 + * We don't assign an index until we actually place the event onto
777 + * hardware. Use -1 to signify that we haven't decided where to put it
778 + * yet.
779 + */
780 + hwc->idx = -1;
781 + hwc->config_base = 0;
782 + hwc->config = 0;
783 + hwc->event_base = 0;
784 +
785 + /*
786 + * Store the event encoding into the config_base field.
787 + */
788 + hwc->config_base |= (unsigned long)mapping;
789 +
790 + return 0;
791 +}
792 +
793 +static void l4stat_pmu_start(struct perf_event *event, int pmu_flags)
794 +{
795 + struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
796 + struct hw_perf_event *hwc = &event->hw;
797 + int idx = hwc->idx;
798 + u32 ctrl;
799 +
800 + hwc->state = 0;
801 + ctrl = 0;
802 +
803 + if (event->attr.exclude_user)
804 + ctrl |= CCTRL_SU_KERNEL;
805 + else if (event->attr.exclude_kernel)
806 + ctrl |= CCTRL_SU_USER;
807 +
808 + ctrl |= event->attr.config2 << CCTRL_SU_BIT;
809 +
810 + ctrl |= event->attr.config << CCTRL_EVENTID_BIT;
811 +
812 + // Processor events (except processor AHB events)
813 + if ((event->attr.config >= PROC_EVENT_RANGE_START &&
814 + event->attr.config < L4STAT_EVENT_AHB_UTILIZATION) ||
815 + (event->attr.config > L4STAT_EVENT_AHB_TOTAL_UTILIZATION &&
816 + event->attr.config <= PROC_EVENT_RANGE_END))
817 + // CPU AHBM
818 + if (event->attr.config1 <= GR740_MAX_CPUID)
819 + ctrl |= event->oncpu << CCTRL_CPUAHBM_BIT;
820 + // Non-CPU AHBM
821 + else
822 + ctrl |= event->attr.config1 << CCTRL_CPUAHBM_BIT;
823 + // Remaining events (proc AHB, external, AHB, AHB trace, REQ/GNT)
824 + else
825 + // Filter only on non-CPU AHBM
826 + if (event->attr.config2 == SU_OPT1)
827 + // CPU AHBM
828 + if (event->attr.config1 <= GR740_MAX_CPUID)
829 + ctrl |= event->oncpu << CCTRL_CPUAHBM_BIT;
830 + // Non-CPU AHBM
831 + else
832 + ctrl |= event->attr.config1 << CCTRL_CPUAHBM_BIT;
833 + // Filter on any AHBM
834 + else if (event->attr.config2 == SU_OPT2)
835 + ctrl |= event->attr.config1 << CCTRL_CPUAHBM_BIT;
836 +
837 + ctrl |= CCTRL_EN;
838 +
839 + l4stat_pmu_write_register(l4stat_pmu, ctrl, idx, L4STAT_CCTRL);
840 +}
841 +
842 +static int l4stat_pmu_add(struct perf_event *event, int flags)
843 +{
844 + struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
845 + struct l4stat_pmu_hw_events *hw_events = &l4stat_pmu->hw_events;
846 + struct hw_perf_event *hwc = &event->hw;
847 + int idx;
848 +
849 + /* If we don't have a space for the counter then finish early. */
850 + idx = l4stat_get_event_idx(hw_events);
851 + if (idx < 0)
852 + return idx;
853 +
854 + event->hw.idx = idx;
855 + hw_events->events[idx] = event;
856 +
857 + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
858 + if (flags & PERF_EF_START)
859 + l4stat_pmu_start(event, PERF_EF_RELOAD);
860 +
861 + /* Propagate our changes to the userspace mapping. */
862 + perf_event_update_userpage(event);
863 +
864 + return 0;
865 +}
866 +
867 +static void l4stat_pmu_disable_counter(struct l4stat_pmu *l4stat_pmu, int idx)
868 +{
869 + u32 ctrl;
870 +
871 + ctrl = l4stat_pmu_read_register(l4stat_pmu, idx, L4STAT_CCTRL);
872 + ctrl &= ~(CCTRL_EN);
873 + l4stat_pmu_write_register(l4stat_pmu, ctrl, idx, L4STAT_CCTRL);
874 +}
875 +
876 +static u32 l4stat_pmu_event_update(struct perf_event *event)
877 +{
878 + struct hw_perf_event *hwc = &event->hw;
879 + u64 prev_raw_count, new_raw_count;
880 +
881 + do {
882 + prev_raw_count = local64_read(&hwc->prev_count);
883 + new_raw_count = l4stat_pmu_read_counter(event);
884 + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
885 + new_raw_count) != prev_raw_count);
886 +
887 + local64_add(new_raw_count, &event->count);
888 +
889 + return new_raw_count;
890 +}
891 +
892 +static void l4stat_pmu_stop(struct perf_event *event, int pmu_flags)
893 +{
894 + struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
895 + struct hw_perf_event *hwc = &event->hw;
896 + int idx = hwc->idx;
897 + u64 counter_val;
898 +
899 + if (hwc->state & PERF_HES_STOPPED)
900 + return;
901 +
902 + l4stat_pmu_disable_counter(l4stat_pmu, idx);
903 +
904 + counter_val = l4stat_pmu_event_update(event);
905 +
906 + l4stat_pmu_clear_counter(l4stat_pmu, idx);
907 +
908 + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
909 +}
910 +
911 +static void l4stat_pmu_del(struct perf_event *event, int flags)
912 +{
913 + struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
914 + struct l4stat_pmu_hw_events *hw_events = &l4stat_pmu->hw_events;
915 + struct hw_perf_event *hwc = &event->hw;
916 + int idx = hwc->idx;
917 +
918 + l4stat_pmu_stop(event, PERF_EF_UPDATE);
919 + hw_events->events[idx] = NULL;
920 + clear_bit(idx, hw_events->used_mask);
921 +
922 + perf_event_update_userpage(event);
923 +}
924 +
925 +static int l4stat_pmu_init(struct l4stat_pmu *l4stat_pmu,
926 + struct platform_device *pdev)
927 +{
928 + platform_set_drvdata(pdev, l4stat_pmu);
929 +
930 + l4stat_pmu->pmu = (struct pmu){
931 + .module = THIS_MODULE,
932 + .name = DRV_NAME,
933 + .task_ctx_nr = perf_hw_context,
934 + .event_init = l4stat_pmu_event_init,
935 + .add = l4stat_pmu_add,
936 + .del = l4stat_pmu_del,
937 + .start = l4stat_pmu_start,
938 + .stop = l4stat_pmu_stop,
939 + .attr_groups = l4stat_pmu_attr_groups,
940 + .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
941 + };
942 +
943 + l4stat_pmu->platform_dev = pdev;
944 +
945 + return perf_pmu_register(&l4stat_pmu->pmu, DRV_NAME, -1);
946 +}
947 +
948 +static struct l4stat_pmu *l4stat_pmu_alloc(struct device *dev)
949 +{
950 + struct l4stat_pmu *l4stat_pmu;
951 +
952 + l4stat_pmu = devm_kzalloc(dev, sizeof(*l4stat_pmu), GFP_KERNEL);
953 + if (!l4stat_pmu)
954 + return ERR_PTR(-ENOMEM);
955 +
956 + l4stat_pmu->hw_events.events =
957 + devm_kcalloc(dev, L4STAT_NUM_CNTRS,
958 + sizeof(*l4stat_pmu->hw_events.events), GFP_KERNEL);
959 + if (!l4stat_pmu->hw_events.events)
960 + return ERR_PTR(-ENOMEM);
961 +
962 + l4stat_pmu->hw_events.used_mask =
963 + devm_kcalloc(dev, BITS_TO_LONGS(L4STAT_NUM_CNTRS),
964 + sizeof(*l4stat_pmu->hw_events.used_mask),
965 + GFP_KERNEL);
966 + if (!l4stat_pmu->hw_events.used_mask)
967 + return ERR_PTR(-ENOMEM);
968 +
969 + return l4stat_pmu;
970 +}
971 +
972 +static int l4stat_pmu_probe(struct platform_device *pdev)
973 +{
974 + struct l4stat_pmu *l4stat_pmu;
975 + int err;
976 +
977 + l4stat_pmu = l4stat_pmu_alloc(&pdev->dev);
978 + if (IS_ERR(l4stat_pmu))
979 + return PTR_ERR(l4stat_pmu);
980 +
981 + l4stat_pmu->regs = devm_platform_ioremap_resource(pdev, 0);
982 + if (IS_ERR(l4stat_pmu->regs)) {
983 + err = PTR_ERR(l4stat_pmu->regs);
984 + goto exit_error;
985 + }
986 +
987 + err = l4stat_pmu_init(l4stat_pmu, pdev);
988 + if (err)
989 + goto exit_error;
990 +
991 + return 0;
992 +
993 +exit_error:
994 + dev_err(&pdev->dev, "%s driver initialization failed with error %d\n",
995 + DRV_NAME, err);
996 + return err;
997 +}
998 +
999 +static int l4stat_pmu_remove(struct platform_device *pdev)
1000 +{
1001 + struct l4stat_pmu *l4stat_pmu = platform_get_drvdata(pdev);
1002 +
1003 + perf_pmu_unregister(&l4stat_pmu->pmu);
1004 +
1005 + return 0;
1006 +}
1007 +
1008 +static const struct of_device_id l4stat_match[] = {
1009 + { .name = "GAISLER_L4STAT" },
1010 + { .name = "01_047" },
1011 + {},
1012 +};
1013 +
1014 +MODULE_DEVICE_TABLE(of, l4stat_match);
1015 +
1016 +static struct platform_driver l4stat_pmu_driver = {
1017 + .driver = {
1018 + .name = DRV_NAME,
1019 + .of_match_table = l4stat_match,
1020 + },
1021 + .probe = l4stat_pmu_probe,
1022 + .remove = l4stat_pmu_remove,
1023 +};
1024 +
1025 +module_platform_driver(l4stat_pmu_driver);
1026 --
1027 2.34.1
1028