bcm27xx: update 6.1 patches to latest version
[openwrt/staging/dangole.git] / target / linux / bcm27xx / patches-6.1 / 950-0902-drm-v3d-fix-up-register-addresses-for-V3D-7.x.patch
1 From 3f949caeef21269afc67dd62ae9826204f215934 Mon Sep 17 00:00:00 2001
2 From: Iago Toral Quiroga <itoral@igalia.com>
3 Date: Thu, 2 Mar 2023 11:49:46 +0100
4 Subject: [PATCH] drm/v3d: fix up register addresses for V3D 7.x
5
6 v2: fix kernel panic with debug-fs interface to list registers
7 ---
8 drivers/gpu/drm/v3d/v3d_debugfs.c | 177 +++++++++++++++++-------------
9 drivers/gpu/drm/v3d/v3d_gem.c | 3 +
10 drivers/gpu/drm/v3d/v3d_irq.c | 47 ++++----
11 drivers/gpu/drm/v3d/v3d_regs.h | 51 ++++++++-
12 drivers/gpu/drm/v3d/v3d_sched.c | 41 ++++---
13 5 files changed, 204 insertions(+), 115 deletions(-)
14
15 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c
16 +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
17 @@ -13,69 +13,83 @@
18 #include "v3d_drv.h"
19 #include "v3d_regs.h"
20
21 -#define REGDEF(reg) { reg, #reg }
22 +#define REGDEF(min_ver, max_ver, reg) { min_ver, max_ver, reg, #reg }
23 struct v3d_reg_def {
24 + u32 min_ver;
25 + u32 max_ver;
26 u32 reg;
27 const char *name;
28 };
29
30 static const struct v3d_reg_def v3d_hub_reg_defs[] = {
31 - REGDEF(V3D_HUB_AXICFG),
32 - REGDEF(V3D_HUB_UIFCFG),
33 - REGDEF(V3D_HUB_IDENT0),
34 - REGDEF(V3D_HUB_IDENT1),
35 - REGDEF(V3D_HUB_IDENT2),
36 - REGDEF(V3D_HUB_IDENT3),
37 - REGDEF(V3D_HUB_INT_STS),
38 - REGDEF(V3D_HUB_INT_MSK_STS),
39 -
40 - REGDEF(V3D_MMU_CTL),
41 - REGDEF(V3D_MMU_VIO_ADDR),
42 - REGDEF(V3D_MMU_VIO_ID),
43 - REGDEF(V3D_MMU_DEBUG_INFO),
44 + REGDEF(33, 42, V3D_HUB_AXICFG),
45 + REGDEF(33, 71, V3D_HUB_UIFCFG),
46 + REGDEF(33, 71, V3D_HUB_IDENT0),
47 + REGDEF(33, 71, V3D_HUB_IDENT1),
48 + REGDEF(33, 71, V3D_HUB_IDENT2),
49 + REGDEF(33, 71, V3D_HUB_IDENT3),
50 + REGDEF(33, 71, V3D_HUB_INT_STS),
51 + REGDEF(33, 71, V3D_HUB_INT_MSK_STS),
52 +
53 + REGDEF(33, 71, V3D_MMU_CTL),
54 + REGDEF(33, 71, V3D_MMU_VIO_ADDR),
55 + REGDEF(33, 71, V3D_MMU_VIO_ID),
56 + REGDEF(33, 71, V3D_MMU_DEBUG_INFO),
57 +
58 + REGDEF(71, 71, V3D_V7_GMP_STATUS),
59 + REGDEF(71, 71, V3D_V7_GMP_CFG),
60 + REGDEF(71, 71, V3D_V7_GMP_VIO_ADDR),
61 };
62
63 static const struct v3d_reg_def v3d_gca_reg_defs[] = {
64 - REGDEF(V3D_GCA_SAFE_SHUTDOWN),
65 - REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
66 + REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN),
67 + REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK),
68 };
69
70 static const struct v3d_reg_def v3d_core_reg_defs[] = {
71 - REGDEF(V3D_CTL_IDENT0),
72 - REGDEF(V3D_CTL_IDENT1),
73 - REGDEF(V3D_CTL_IDENT2),
74 - REGDEF(V3D_CTL_MISCCFG),
75 - REGDEF(V3D_CTL_INT_STS),
76 - REGDEF(V3D_CTL_INT_MSK_STS),
77 - REGDEF(V3D_CLE_CT0CS),
78 - REGDEF(V3D_CLE_CT0CA),
79 - REGDEF(V3D_CLE_CT0EA),
80 - REGDEF(V3D_CLE_CT1CS),
81 - REGDEF(V3D_CLE_CT1CA),
82 - REGDEF(V3D_CLE_CT1EA),
83 -
84 - REGDEF(V3D_PTB_BPCA),
85 - REGDEF(V3D_PTB_BPCS),
86 -
87 - REGDEF(V3D_GMP_STATUS),
88 - REGDEF(V3D_GMP_CFG),
89 - REGDEF(V3D_GMP_VIO_ADDR),
90 -
91 - REGDEF(V3D_ERR_FDBGO),
92 - REGDEF(V3D_ERR_FDBGB),
93 - REGDEF(V3D_ERR_FDBGS),
94 - REGDEF(V3D_ERR_STAT),
95 + REGDEF(33, 71, V3D_CTL_IDENT0),
96 + REGDEF(33, 71, V3D_CTL_IDENT1),
97 + REGDEF(33, 71, V3D_CTL_IDENT2),
98 + REGDEF(33, 71, V3D_CTL_MISCCFG),
99 + REGDEF(33, 71, V3D_CTL_INT_STS),
100 + REGDEF(33, 71, V3D_CTL_INT_MSK_STS),
101 + REGDEF(33, 71, V3D_CLE_CT0CS),
102 + REGDEF(33, 71, V3D_CLE_CT0CA),
103 + REGDEF(33, 71, V3D_CLE_CT0EA),
104 + REGDEF(33, 71, V3D_CLE_CT1CS),
105 + REGDEF(33, 71, V3D_CLE_CT1CA),
106 + REGDEF(33, 71, V3D_CLE_CT1EA),
107 +
108 + REGDEF(33, 71, V3D_PTB_BPCA),
109 + REGDEF(33, 71, V3D_PTB_BPCS),
110 +
111 + REGDEF(33, 41, V3D_GMP_STATUS),
112 + REGDEF(33, 41, V3D_GMP_CFG),
113 + REGDEF(33, 41, V3D_GMP_VIO_ADDR),
114 +
115 + REGDEF(33, 71, V3D_ERR_FDBGO),
116 + REGDEF(33, 71, V3D_ERR_FDBGB),
117 + REGDEF(33, 71, V3D_ERR_FDBGS),
118 + REGDEF(33, 71, V3D_ERR_STAT),
119 };
120
121 static const struct v3d_reg_def v3d_csd_reg_defs[] = {
122 - REGDEF(V3D_CSD_STATUS),
123 - REGDEF(V3D_CSD_CURRENT_CFG0),
124 - REGDEF(V3D_CSD_CURRENT_CFG1),
125 - REGDEF(V3D_CSD_CURRENT_CFG2),
126 - REGDEF(V3D_CSD_CURRENT_CFG3),
127 - REGDEF(V3D_CSD_CURRENT_CFG4),
128 - REGDEF(V3D_CSD_CURRENT_CFG5),
129 - REGDEF(V3D_CSD_CURRENT_CFG6),
130 + REGDEF(41, 71, V3D_CSD_STATUS),
131 + REGDEF(41, 41, V3D_CSD_CURRENT_CFG0),
132 + REGDEF(41, 41, V3D_CSD_CURRENT_CFG1),
133 + REGDEF(41, 41, V3D_CSD_CURRENT_CFG2),
134 + REGDEF(41, 41, V3D_CSD_CURRENT_CFG3),
135 + REGDEF(41, 41, V3D_CSD_CURRENT_CFG4),
136 + REGDEF(41, 41, V3D_CSD_CURRENT_CFG5),
137 + REGDEF(41, 41, V3D_CSD_CURRENT_CFG6),
138 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG0),
139 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG1),
140 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG2),
141 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG3),
142 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG4),
143 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG5),
144 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG6),
145 + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG7),
146 };
147
148 static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
149 @@ -86,38 +100,41 @@ static int v3d_v3d_debugfs_regs(struct s
150 int i, core;
151
152 for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) {
153 - seq_printf(m, "%s (0x%04x): 0x%08x\n",
154 - v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg,
155 - V3D_READ(v3d_hub_reg_defs[i].reg));
156 + const struct v3d_reg_def *def = &v3d_hub_reg_defs[i];
157 +
158 + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
159 + seq_printf(m, "%s (0x%04x): 0x%08x\n",
160 + def->name, def->reg, V3D_READ(def->reg));
161 + }
162 }
163
164 - if (v3d->ver < 41) {
165 - for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
166 + for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
167 + const struct v3d_reg_def *def = &v3d_gca_reg_defs[i];
168 +
169 + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
170 seq_printf(m, "%s (0x%04x): 0x%08x\n",
171 - v3d_gca_reg_defs[i].name,
172 - v3d_gca_reg_defs[i].reg,
173 - V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
174 + def->name, def->reg, V3D_GCA_READ(def->reg));
175 }
176 }
177
178 for (core = 0; core < v3d->cores; core++) {
179 for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) {
180 - seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
181 - core,
182 - v3d_core_reg_defs[i].name,
183 - v3d_core_reg_defs[i].reg,
184 - V3D_CORE_READ(core,
185 - v3d_core_reg_defs[i].reg));
186 + const struct v3d_reg_def *def = &v3d_core_reg_defs[i];
187 +
188 + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
189 + seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
190 + core, def->name, def->reg,
191 + V3D_CORE_READ(core, def->reg));
192 + }
193 }
194
195 - if (v3d_has_csd(v3d)) {
196 - for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) {
197 + for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) {
198 + const struct v3d_reg_def *def = &v3d_csd_reg_defs[i];
199 +
200 + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
201 seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
202 - core,
203 - v3d_csd_reg_defs[i].name,
204 - v3d_csd_reg_defs[i].reg,
205 - V3D_CORE_READ(core,
206 - v3d_csd_reg_defs[i].reg));
207 + core, def->name, def->reg,
208 + V3D_CORE_READ(core, def->reg));
209 }
210 }
211 }
212 @@ -148,8 +165,10 @@ static int v3d_v3d_debugfs_ident(struct
213 str_yes_no(ident2 & V3D_HUB_IDENT2_WITH_MMU));
214 seq_printf(m, "TFU: %s\n",
215 str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TFU));
216 - seq_printf(m, "TSY: %s\n",
217 - str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY));
218 + if (v3d->ver <= 42) {
219 + seq_printf(m, "TSY: %s\n",
220 + str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY));
221 + }
222 seq_printf(m, "MSO: %s\n",
223 str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_MSO));
224 seq_printf(m, "L3C: %s (%dkb)\n",
225 @@ -178,10 +197,14 @@ static int v3d_v3d_debugfs_ident(struct
226 seq_printf(m, " QPUs: %d\n", nslc * qups);
227 seq_printf(m, " Semaphores: %d\n",
228 V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM));
229 - seq_printf(m, " BCG int: %d\n",
230 - (ident2 & V3D_IDENT2_BCG_INT) != 0);
231 - seq_printf(m, " Override TMU: %d\n",
232 - (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
233 + if (v3d->ver <= 42) {
234 + seq_printf(m, " BCG int: %d\n",
235 + (ident2 & V3D_IDENT2_BCG_INT) != 0);
236 + }
237 + if (v3d->ver < 40) {
238 + seq_printf(m, " Override TMU: %d\n",
239 + (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
240 + }
241 }
242
243 return 0;
244 @@ -289,8 +312,10 @@ static int v3d_measure_clock(struct seq_
245 int measure_ms = 1000;
246
247 if (v3d->ver >= 40) {
248 + int cycle_count_reg = v3d->ver < 71 ?
249 + V3D_PCTR_CYCLE_COUNT : V3D_V7_PCTR_CYCLE_COUNT;
250 V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3,
251 - V3D_SET_FIELD(V3D_PCTR_CYCLE_COUNT,
252 + V3D_SET_FIELD(cycle_count_reg,
253 V3D_PCTR_S0));
254 V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1);
255 V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1);
256 --- a/drivers/gpu/drm/v3d/v3d_gem.c
257 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
258 @@ -88,6 +88,9 @@ v3d_init_hw_state(struct v3d_dev *v3d)
259 static void
260 v3d_idle_axi(struct v3d_dev *v3d, int core)
261 {
262 + if (v3d->ver >= 71)
263 + return;
264 +
265 V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
266
267 if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
268 --- a/drivers/gpu/drm/v3d/v3d_irq.c
269 +++ b/drivers/gpu/drm/v3d/v3d_irq.c
270 @@ -20,16 +20,17 @@
271 #include "v3d_regs.h"
272 #include "v3d_trace.h"
273
274 -#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \
275 - V3D_INT_FLDONE | \
276 - V3D_INT_FRDONE | \
277 - V3D_INT_CSDDONE | \
278 - V3D_INT_GMPV))
279 -
280 -#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \
281 - V3D_HUB_INT_MMU_PTI | \
282 - V3D_HUB_INT_MMU_CAP | \
283 - V3D_HUB_INT_TFUC))
284 +#define V3D_CORE_IRQS(ver) ((u32)(V3D_INT_OUTOMEM | \
285 + V3D_INT_FLDONE | \
286 + V3D_INT_FRDONE | \
287 + (ver < 71 ? V3D_INT_CSDDONE : V3D_V7_INT_CSDDONE) | \
288 + (ver < 71 ? V3D_INT_GMPV : 0)))
289 +
290 +#define V3D_HUB_IRQS(ver) ((u32)(V3D_HUB_INT_MMU_WRV | \
291 + V3D_HUB_INT_MMU_PTI | \
292 + V3D_HUB_INT_MMU_CAP | \
293 + V3D_HUB_INT_TFUC | \
294 + (ver >= 71 ? V3D_V7_HUB_INT_GMPV : 0)))
295
296 static irqreturn_t
297 v3d_hub_irq(int irq, void *arg);
298 @@ -118,7 +119,8 @@ v3d_irq(int irq, void *arg)
299 status = IRQ_HANDLED;
300 }
301
302 - if (intsts & V3D_INT_CSDDONE) {
303 + if ((v3d->ver < 71 && (intsts & V3D_INT_CSDDONE)) ||
304 + (v3d->ver >= 71 && (intsts & V3D_V7_INT_CSDDONE))) {
305 struct v3d_fence *fence =
306 to_v3d_fence(v3d->csd_job->base.irq_fence);
307 v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
308 @@ -131,7 +133,7 @@ v3d_irq(int irq, void *arg)
309 /* We shouldn't be triggering these if we have GMP in
310 * always-allowed mode.
311 */
312 - if (intsts & V3D_INT_GMPV)
313 + if (v3d->ver < 71 && (intsts & V3D_INT_GMPV))
314 dev_err(v3d->drm.dev, "GMP violation\n");
315
316 /* V3D 4.2 wires the hub and core IRQs together, so if we &
317 @@ -205,6 +207,11 @@ v3d_hub_irq(int irq, void *arg)
318 status = IRQ_HANDLED;
319 }
320
321 + if (v3d->ver >= 71 && intsts & V3D_V7_HUB_INT_GMPV) {
322 + dev_err(v3d->drm.dev, "GMP Violation\n");
323 + status = IRQ_HANDLED;
324 + }
325 +
326 return status;
327 }
328
329 @@ -219,8 +226,8 @@ v3d_irq_init(struct v3d_dev *v3d)
330 * for us.
331 */
332 for (core = 0; core < v3d->cores; core++)
333 - V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
334 - V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
335 + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
336 + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver));
337
338 irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1);
339 if (irq1 == -EPROBE_DEFER)
340 @@ -264,12 +271,12 @@ v3d_irq_enable(struct v3d_dev *v3d)
341
342 /* Enable our set of interrupts, masking out any others. */
343 for (core = 0; core < v3d->cores; core++) {
344 - V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS);
345 - V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS);
346 + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS(v3d->ver));
347 + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS(v3d->ver));
348 }
349
350 - V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS);
351 - V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS);
352 + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS(v3d->ver));
353 + V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS(v3d->ver));
354 }
355
356 void
357 @@ -284,8 +291,8 @@ v3d_irq_disable(struct v3d_dev *v3d)
358
359 /* Clear any pending interrupts we might have left. */
360 for (core = 0; core < v3d->cores; core++)
361 - V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
362 - V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
363 + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
364 + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver));
365
366 cancel_work_sync(&v3d->overflow_mem_work);
367 }
368 --- a/drivers/gpu/drm/v3d/v3d_regs.h
369 +++ b/drivers/gpu/drm/v3d/v3d_regs.h
370 @@ -57,6 +57,7 @@
371 #define V3D_HUB_INT_MSK_STS 0x0005c
372 #define V3D_HUB_INT_MSK_SET 0x00060
373 #define V3D_HUB_INT_MSK_CLR 0x00064
374 +# define V3D_V7_HUB_INT_GMPV BIT(6)
375 # define V3D_HUB_INT_MMU_WRV BIT(5)
376 # define V3D_HUB_INT_MMU_PTI BIT(4)
377 # define V3D_HUB_INT_MMU_CAP BIT(3)
378 @@ -64,6 +65,7 @@
379 # define V3D_HUB_INT_TFUC BIT(1)
380 # define V3D_HUB_INT_TFUF BIT(0)
381
382 +/* GCA registers only exist in V3D < 41 */
383 #define V3D_GCA_CACHE_CTRL 0x0000c
384 # define V3D_GCA_CACHE_CTRL_FLUSH BIT(0)
385
386 @@ -87,6 +89,7 @@
387 # define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0)
388
389 #define V3D_TFU_CS 0x00400
390 +#define V3D_V7_TFU_CS 0x00700
391 /* Stops current job, empties input fifo. */
392 # define V3D_TFU_CS_TFURST BIT(31)
393 # define V3D_TFU_CS_CVTCT_MASK V3D_MASK(23, 16)
394 @@ -96,6 +99,7 @@
395 # define V3D_TFU_CS_BUSY BIT(0)
396
397 #define V3D_TFU_SU 0x00404
398 +#define V3D_V7_TFU_SU 0x00704
399 /* Interrupt when FINTTHR input slots are free (0 = disabled) */
400 # define V3D_TFU_SU_FINTTHR_MASK V3D_MASK(13, 8)
401 # define V3D_TFU_SU_FINTTHR_SHIFT 8
402 @@ -107,38 +111,53 @@
403 # define V3D_TFU_SU_THROTTLE_SHIFT 0
404
405 #define V3D_TFU_ICFG 0x00408
406 +#define V3D_V7_TFU_ICFG 0x00708
407 /* Interrupt when the conversion is complete. */
408 # define V3D_TFU_ICFG_IOC BIT(0)
409
410 /* Input Image Address */
411 #define V3D_TFU_IIA 0x0040c
412 +#define V3D_V7_TFU_IIA 0x0070c
413 /* Input Chroma Address */
414 #define V3D_TFU_ICA 0x00410
415 +#define V3D_V7_TFU_ICA 0x00710
416 /* Input Image Stride */
417 #define V3D_TFU_IIS 0x00414
418 +#define V3D_V7_TFU_IIS 0x00714
419 /* Input Image U-Plane Address */
420 #define V3D_TFU_IUA 0x00418
421 +#define V3D_V7_TFU_IUA 0x00718
422 +/* Image output config (VD 7.x only) */
423 +#define V3D_V7_TFU_IOC 0x0071c
424 /* Output Image Address */
425 #define V3D_TFU_IOA 0x0041c
426 +#define V3D_V7_TFU_IOA 0x00720
427 /* Image Output Size */
428 #define V3D_TFU_IOS 0x00420
429 +#define V3D_V7_TFU_IOS 0x00724
430 /* TFU YUV Coefficient 0 */
431 #define V3D_TFU_COEF0 0x00424
432 -/* Use these regs instead of the defaults. */
433 +#define V3D_V7_TFU_COEF0 0x00728
434 +/* Use these regs instead of the defaults (V3D 4.x only) */
435 # define V3D_TFU_COEF0_USECOEF BIT(31)
436 /* TFU YUV Coefficient 1 */
437 #define V3D_TFU_COEF1 0x00428
438 +#define V3D_V7_TFU_COEF1 0x0072c
439 /* TFU YUV Coefficient 2 */
440 #define V3D_TFU_COEF2 0x0042c
441 +#define V3D_V7_TFU_COEF2 0x00730
442 /* TFU YUV Coefficient 3 */
443 #define V3D_TFU_COEF3 0x00430
444 +#define V3D_V7_TFU_COEF3 0x00734
445
446 +/* V3D 4.x only */
447 #define V3D_TFU_CRC 0x00434
448
449 /* Per-MMU registers. */
450
451 #define V3D_MMUC_CONTROL 0x01000
452 # define V3D_MMUC_CONTROL_CLEAR BIT(3)
453 +# define V3D_V7_MMUC_CONTROL_CLEAR BIT(11)
454 # define V3D_MMUC_CONTROL_FLUSHING BIT(2)
455 # define V3D_MMUC_CONTROL_FLUSH BIT(1)
456 # define V3D_MMUC_CONTROL_ENABLE BIT(0)
457 @@ -246,7 +265,6 @@
458
459 #define V3D_CTL_L2TCACTL 0x00030
460 # define V3D_L2TCACTL_TMUWCF BIT(8)
461 -# define V3D_L2TCACTL_L2T_NO_WM BIT(4)
462 /* Invalidates cache lines. */
463 # define V3D_L2TCACTL_FLM_FLUSH 0
464 /* Removes cachelines without writing dirty lines back. */
465 @@ -268,7 +286,9 @@
466 # define V3D_INT_QPU_MASK V3D_MASK(27, 16)
467 # define V3D_INT_QPU_SHIFT 16
468 # define V3D_INT_CSDDONE BIT(7)
469 +# define V3D_V7_INT_CSDDONE BIT(6)
470 # define V3D_INT_PCTR BIT(6)
471 +# define V3D_V7_INT_PCTR BIT(5)
472 # define V3D_INT_GMPV BIT(5)
473 # define V3D_INT_TRFB BIT(4)
474 # define V3D_INT_SPILLUSE BIT(3)
475 @@ -350,14 +370,19 @@
476 #define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \
477 4 * (x))
478 # define V3D_PCTR_S0_MASK V3D_MASK(6, 0)
479 +# define V3D_V7_PCTR_S0_MASK V3D_MASK(7, 0)
480 # define V3D_PCTR_S0_SHIFT 0
481 # define V3D_PCTR_S1_MASK V3D_MASK(14, 8)
482 +# define V3D_V7_PCTR_S1_MASK V3D_MASK(15, 8)
483 # define V3D_PCTR_S1_SHIFT 8
484 # define V3D_PCTR_S2_MASK V3D_MASK(22, 16)
485 +# define V3D_V7_PCTR_S2_MASK V3D_MASK(23, 16)
486 # define V3D_PCTR_S2_SHIFT 16
487 # define V3D_PCTR_S3_MASK V3D_MASK(30, 24)
488 +# define V3D_V7_PCTR_S3_MASK V3D_MASK(31, 24)
489 # define V3D_PCTR_S3_SHIFT 24
490 # define V3D_PCTR_CYCLE_COUNT 32
491 +# define V3D_V7_PCTR_CYCLE_COUNT 0
492
493 /* Output values of the counters. */
494 #define V3D_PCTR_0_PCTR0 0x00680
495 @@ -365,6 +390,7 @@
496 #define V3D_PCTR_0_PCTRX(x) (V3D_PCTR_0_PCTR0 + \
497 4 * (x))
498 #define V3D_GMP_STATUS 0x00800
499 +#define V3D_V7_GMP_STATUS 0x00600
500 # define V3D_GMP_STATUS_GMPRST BIT(31)
501 # define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24)
502 # define V3D_GMP_STATUS_WR_COUNT_SHIFT 24
503 @@ -378,12 +404,14 @@
504 # define V3D_GMP_STATUS_VIO BIT(0)
505
506 #define V3D_GMP_CFG 0x00804
507 +#define V3D_V7_GMP_CFG 0x00604
508 # define V3D_GMP_CFG_LBURSTEN BIT(3)
509 # define V3D_GMP_CFG_PGCRSEN BIT()
510 # define V3D_GMP_CFG_STOP_REQ BIT(1)
511 # define V3D_GMP_CFG_PROT_ENABLE BIT(0)
512
513 #define V3D_GMP_VIO_ADDR 0x00808
514 +#define V3D_V7_GMP_VIO_ADDR 0x00608
515 #define V3D_GMP_VIO_TYPE 0x0080c
516 #define V3D_GMP_TABLE_ADDR 0x00810
517 #define V3D_GMP_CLEAR_LOAD 0x00814
518 @@ -399,24 +427,28 @@
519 # define V3D_CSD_STATUS_HAVE_QUEUED_DISPATCH BIT(0)
520
521 #define V3D_CSD_QUEUED_CFG0 0x00904
522 +#define V3D_V7_CSD_QUEUED_CFG0 0x00930
523 # define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_MASK V3D_MASK(31, 16)
524 # define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_SHIFT 16
525 # define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_MASK V3D_MASK(15, 0)
526 # define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_SHIFT 0
527
528 #define V3D_CSD_QUEUED_CFG1 0x00908
529 +#define V3D_V7_CSD_QUEUED_CFG1 0x00934
530 # define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_MASK V3D_MASK(31, 16)
531 # define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_SHIFT 16
532 # define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_MASK V3D_MASK(15, 0)
533 # define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_SHIFT 0
534
535 #define V3D_CSD_QUEUED_CFG2 0x0090c
536 +#define V3D_V7_CSD_QUEUED_CFG2 0x00938
537 # define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_MASK V3D_MASK(31, 16)
538 # define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_SHIFT 16
539 # define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_MASK V3D_MASK(15, 0)
540 # define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_SHIFT 0
541
542 #define V3D_CSD_QUEUED_CFG3 0x00910
543 +#define V3D_V7_CSD_QUEUED_CFG3 0x0093c
544 # define V3D_CSD_QUEUED_CFG3_OVERLAP_WITH_PREV BIT(26)
545 # define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_MASK V3D_MASK(25, 20)
546 # define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_SHIFT 20
547 @@ -429,22 +461,36 @@
548
549 /* Number of batches, minus 1 */
550 #define V3D_CSD_QUEUED_CFG4 0x00914
551 +#define V3D_V7_CSD_QUEUED_CFG4 0x00940
552
553 /* Shader address, pnan, singleseg, threading, like a shader record. */
554 #define V3D_CSD_QUEUED_CFG5 0x00918
555 +#define V3D_V7_CSD_QUEUED_CFG5 0x00944
556
557 /* Uniforms address (4 byte aligned) */
558 #define V3D_CSD_QUEUED_CFG6 0x0091c
559 +#define V3D_V7_CSD_QUEUED_CFG6 0x00948
560 +
561 +#define V3D_V7_CSD_QUEUED_CFG7 0x0094c
562
563 #define V3D_CSD_CURRENT_CFG0 0x00920
564 +#define V3D_V7_CSD_CURRENT_CFG0 0x00958
565 #define V3D_CSD_CURRENT_CFG1 0x00924
566 +#define V3D_V7_CSD_CURRENT_CFG1 0x0095c
567 #define V3D_CSD_CURRENT_CFG2 0x00928
568 +#define V3D_V7_CSD_CURRENT_CFG2 0x00960
569 #define V3D_CSD_CURRENT_CFG3 0x0092c
570 +#define V3D_V7_CSD_CURRENT_CFG3 0x00964
571 #define V3D_CSD_CURRENT_CFG4 0x00930
572 +#define V3D_V7_CSD_CURRENT_CFG4 0x00968
573 #define V3D_CSD_CURRENT_CFG5 0x00934
574 +#define V3D_V7_CSD_CURRENT_CFG5 0x0096c
575 #define V3D_CSD_CURRENT_CFG6 0x00938
576 +#define V3D_V7_CSD_CURRENT_CFG6 0x00970
577 +#define V3D_V7_CSD_CURRENT_CFG7 0x00974
578
579 #define V3D_CSD_CURRENT_ID0 0x0093c
580 +#define V3D_V7_CSD_CURRENT_ID0 0x00978
581 # define V3D_CSD_CURRENT_ID0_WG_X_MASK V3D_MASK(31, 16)
582 # define V3D_CSD_CURRENT_ID0_WG_X_SHIFT 16
583 # define V3D_CSD_CURRENT_ID0_WG_IN_SG_MASK V3D_MASK(11, 8)
584 @@ -453,6 +499,7 @@
585 # define V3D_CSD_CURRENT_ID0_L_IDX_SHIFT 0
586
587 #define V3D_CSD_CURRENT_ID1 0x00940
588 +#define V3D_V7_CSD_CURRENT_ID1 0x0097c
589 # define V3D_CSD_CURRENT_ID0_WG_Z_MASK V3D_MASK(31, 16)
590 # define V3D_CSD_CURRENT_ID0_WG_Z_SHIFT 16
591 # define V3D_CSD_CURRENT_ID0_WG_Y_MASK V3D_MASK(15, 0)
592 --- a/drivers/gpu/drm/v3d/v3d_sched.c
593 +++ b/drivers/gpu/drm/v3d/v3d_sched.c
594 @@ -282,6 +282,8 @@ static struct dma_fence *v3d_render_job_
595 return fence;
596 }
597
598 +#define V3D_TFU_REG(name) ((v3d->ver < 71) ? V3D_TFU_ ## name : V3D_V7_TFU_ ## name)
599 +
600 static struct dma_fence *
601 v3d_tfu_job_run(struct drm_sched_job *sched_job)
602 {
603 @@ -302,20 +304,22 @@ v3d_tfu_job_run(struct drm_sched_job *sc
604 trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
605
606 v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
607 - V3D_WRITE(V3D_TFU_IIA, job->args.iia);
608 - V3D_WRITE(V3D_TFU_IIS, job->args.iis);
609 - V3D_WRITE(V3D_TFU_ICA, job->args.ica);
610 - V3D_WRITE(V3D_TFU_IUA, job->args.iua);
611 - V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
612 - V3D_WRITE(V3D_TFU_IOS, job->args.ios);
613 - V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
614 - if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
615 - V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
616 - V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
617 - V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
618 + V3D_WRITE(V3D_TFU_REG(IIA), job->args.iia);
619 + V3D_WRITE(V3D_TFU_REG(IIS), job->args.iis);
620 + V3D_WRITE(V3D_TFU_REG(ICA), job->args.ica);
621 + V3D_WRITE(V3D_TFU_REG(IUA), job->args.iua);
622 + V3D_WRITE(V3D_TFU_REG(IOA), job->args.ioa);
623 + if (v3d->ver >= 71)
624 + V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc);
625 + V3D_WRITE(V3D_TFU_REG(IOS), job->args.ios);
626 + V3D_WRITE(V3D_TFU_REG(COEF0), job->args.coef[0]);
627 + if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) {
628 + V3D_WRITE(V3D_TFU_REG(COEF1), job->args.coef[1]);
629 + V3D_WRITE(V3D_TFU_REG(COEF2), job->args.coef[2]);
630 + V3D_WRITE(V3D_TFU_REG(COEF3), job->args.coef[3]);
631 }
632 /* ICFG kicks off the job. */
633 - V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
634 + V3D_WRITE(V3D_TFU_REG(ICFG), job->args.icfg | V3D_TFU_ICFG_IOC);
635
636 return fence;
637 }
638 @@ -327,7 +331,7 @@ v3d_csd_job_run(struct drm_sched_job *sc
639 struct v3d_dev *v3d = job->base.v3d;
640 struct drm_device *dev = &v3d->drm;
641 struct dma_fence *fence;
642 - int i;
643 + int i, csd_cfg0_reg, csd_cfg_reg_count;
644
645 v3d->csd_job = job;
646
647 @@ -346,10 +350,12 @@ v3d_csd_job_run(struct drm_sched_job *sc
648 v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
649 v3d_switch_perfmon(v3d, &job->base);
650
651 - for (i = 1; i <= 6; i++)
652 - V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
653 + csd_cfg0_reg = v3d->ver < 71 ? V3D_CSD_QUEUED_CFG0 : V3D_V7_CSD_QUEUED_CFG0;
654 + csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7;
655 + for (i = 1; i <= csd_cfg_reg_count; i++)
656 + V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]);
657 /* CFG0 write kicks off the job. */
658 - V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
659 + V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]);
660
661 return fence;
662 }
663 @@ -452,7 +458,8 @@ v3d_csd_job_timedout(struct drm_sched_jo
664 {
665 struct v3d_csd_job *job = to_csd_job(sched_job);
666 struct v3d_dev *v3d = job->base.v3d;
667 - u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
668 + u32 batches = V3D_CORE_READ(0, (v3d->ver < 71 ? V3D_CSD_CURRENT_CFG4 :
669 + V3D_V7_CSD_CURRENT_CFG4));
670
671 /* If we've made progress, skip reset and let the timer get
672 * rearmed.