bcm27xx: update 6.1 patches to latest version
[openwrt/staging/dangole.git] / target / linux / bcm27xx / patches-6.1 / 950-0988-drm-vc4-Assign-LBM-memory-during-atomic_flush.patch
1 From bb0839405b61da6e6ae7141f7433f6a121725e6f Mon Sep 17 00:00:00 2001
2 From: Dave Stevenson <dave.stevenson@raspberrypi.com>
3 Date: Thu, 31 Aug 2023 11:45:38 +0100
4 Subject: [PATCH] drm/vc4: Assign LBM memory during atomic_flush.
5
6 Avoid double buffering LBM allocations by making the
7 allocation a single alloc per crtc at atomic_flush.
8
9 Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
10 ---
11 drivers/gpu/drm/vc4/tests/vc4_test_lbm_size.c | 2 +-
12 drivers/gpu/drm/vc4/vc4_drv.h | 8 ++--
13 drivers/gpu/drm/vc4/vc4_hvs.c | 47 ++++++++++++++++++-
14 drivers/gpu/drm/vc4/vc4_plane.c | 38 +++------------
15 4 files changed, 58 insertions(+), 37 deletions(-)
16
17 --- a/drivers/gpu/drm/vc4/tests/vc4_test_lbm_size.c
18 +++ b/drivers/gpu/drm/vc4/tests/vc4_test_lbm_size.c
19 @@ -248,7 +248,7 @@ static void drm_vc4_test_vc4_lbm_size(st
20 ret = drm_atomic_check_only(state);
21 KUNIT_ASSERT_EQ(test, ret, 0);
22
23 - KUNIT_EXPECT_EQ(test, vc4_plane_state->lbm.size, params->expected_lbm_size);
24 + KUNIT_EXPECT_EQ(test, vc4_plane_state->lbm_size, params->expected_lbm_size);
25
26 for (i = 0; i < 2; i++) {
27 KUNIT_EXPECT_EQ(test,
28 --- a/drivers/gpu/drm/vc4/vc4_drv.h
29 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
30 @@ -437,6 +437,8 @@ struct vc4_plane_state {
31 u32 dlist_size; /* Number of dwords allocated for the display list */
32 u32 dlist_count; /* Number of used dwords in the display list. */
33
34 + u32 lbm_size; /* LBM requirements for this plane */
35 +
36 /* Offset in the dlist to various words, for pageflip or
37 * cursor updates.
38 */
39 @@ -462,9 +464,6 @@ struct vc4_plane_state {
40 bool is_unity;
41 bool is_yuv;
42
43 - /* Our allocation in LBM for temporary storage during scaling. */
44 - struct drm_mm_node lbm;
45 -
46 /* Our allocation in UPM for prefetching. */
47 struct drm_mm_node upm[DRM_FORMAT_MAX_PLANES];
48
49 @@ -661,6 +660,9 @@ struct vc4_crtc {
50 * access to that value.
51 */
52 unsigned int current_hvs_channel;
53 +
54 + /* @lbm: Our allocation in LBM for temporary storage during scaling. */
55 + struct drm_mm_node lbm;
56 };
57
58 static inline struct vc4_crtc *
59 --- a/drivers/gpu/drm/vc4/vc4_hvs.c
60 +++ b/drivers/gpu/drm/vc4/vc4_hvs.c
61 @@ -1103,6 +1103,7 @@ int vc4_hvs_atomic_check(struct drm_crtc
62 struct drm_plane *plane;
63 const struct drm_plane_state *plane_state;
64 u32 dlist_count = 0;
65 + u32 lbm_count = 0;
66
67 /* The pixelvalve can only feed one encoder (and encoders are
68 * 1:1 with connectors.)
69 @@ -1111,6 +1112,8 @@ int vc4_hvs_atomic_check(struct drm_crtc
70 return -EINVAL;
71
72 drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
73 + const struct vc4_plane_state *vc4_plane_state =
74 + to_vc4_plane_state(plane_state);
75 u32 plane_dlist_count = vc4_plane_dlist_size(plane_state);
76
77 drm_dbg_driver(dev, "[CRTC:%d:%s] Found [PLANE:%d:%s] with DLIST size: %u\n",
78 @@ -1119,6 +1122,7 @@ int vc4_hvs_atomic_check(struct drm_crtc
79 plane_dlist_count);
80
81 dlist_count += plane_dlist_count;
82 + lbm_count += vc4_plane_state->lbm_size;
83 }
84
85 dlist_count++; /* Account for SCALER_CTL0_END. */
86 @@ -1132,6 +1136,8 @@ int vc4_hvs_atomic_check(struct drm_crtc
87
88 vc4_state->mm = alloc;
89
90 + /* FIXME: Check total lbm allocation here */
91 +
92 return vc4_hvs_gamma_check(crtc, state);
93 }
94
95 @@ -1246,7 +1252,10 @@ void vc4_hvs_atomic_flush(struct drm_crt
96 bool debug_dump_regs = false;
97 bool enable_bg_fill = false;
98 u32 __iomem *dlist_start, *dlist_next;
99 + unsigned long irqflags;
100 unsigned int zpos = 0;
101 + u32 lbm_offset = 0;
102 + u32 lbm_size = 0;
103 bool found = false;
104 int idx;
105
106 @@ -1265,6 +1274,35 @@ void vc4_hvs_atomic_flush(struct drm_crt
107 vc4_hvs_dump_state(hvs);
108 }
109
110 + drm_atomic_crtc_for_each_plane(plane, crtc) {
111 + vc4_plane_state = to_vc4_plane_state(plane->state);
112 + lbm_size += vc4_plane_state->lbm_size;
113 + }
114 +
115 + if (drm_mm_node_allocated(&vc4_crtc->lbm)) {
116 + spin_lock_irqsave(&vc4_crtc->irq_lock, irqflags);
117 + drm_mm_remove_node(&vc4_crtc->lbm);
118 + spin_unlock_irqrestore(&vc4_crtc->irq_lock, irqflags);
119 + }
120 +
121 + if (lbm_size) {
122 + int ret;
123 +
124 + spin_lock_irqsave(&vc4_crtc->irq_lock, irqflags);
125 + ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
126 + &vc4_crtc->lbm,
127 + lbm_size, 1,
128 + 0, 0);
129 + spin_unlock_irqrestore(&vc4_crtc->irq_lock, irqflags);
130 +
131 + if (ret) {
132 + pr_err("Failed to allocate LBM ret %d\n", ret);
133 + return;
134 + }
135 + }
136 +
137 + lbm_offset = vc4_crtc->lbm.start;
138 +
139 dlist_start = vc4->hvs->dlist + vc4_state->mm->mm_node.start;
140 dlist_next = dlist_start;
141
142 @@ -1276,6 +1314,8 @@ void vc4_hvs_atomic_flush(struct drm_crt
143 if (plane->state->normalized_zpos != zpos)
144 continue;
145
146 + vc4_plane_state = to_vc4_plane_state(plane->state);
147 +
148 /* Is this the first active plane? */
149 if (dlist_next == dlist_start) {
150 /* We need to enable background fill when a plane
151 @@ -1286,10 +1326,15 @@ void vc4_hvs_atomic_flush(struct drm_crt
152 * already needs it or all planes on top blend from
153 * the first or a lower plane.
154 */
155 - vc4_plane_state = to_vc4_plane_state(plane->state);
156 enable_bg_fill = vc4_plane_state->needs_bg_fill;
157 }
158
159 + if (vc4_plane_state->lbm_size) {
160 + vc4_plane_state->dlist[vc4_plane_state->lbm_offset] =
161 + lbm_offset;
162 + lbm_offset += vc4_plane_state->lbm_size;
163 + }
164 +
165 dlist_next += vc4_plane_write_dlist(plane, dlist_next);
166
167 found = true;
168 --- a/drivers/gpu/drm/vc4/vc4_plane.c
169 +++ b/drivers/gpu/drm/vc4/vc4_plane.c
170 @@ -288,7 +288,6 @@ struct drm_plane_state *vc4_plane_duplic
171 if (!vc4_state)
172 return NULL;
173
174 - memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
175 memset(&vc4_state->upm, 0, sizeof(vc4_state->upm));
176
177 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++)
178 @@ -320,14 +319,6 @@ void vc4_plane_destroy_state(struct drm_
179 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
180 unsigned int i;
181
182 - if (drm_mm_node_allocated(&vc4_state->lbm)) {
183 - unsigned long irqflags;
184 -
185 - spin_lock_irqsave(&hvs->mm_lock, irqflags);
186 - drm_mm_remove_node(&vc4_state->lbm);
187 - spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
188 - }
189 -
190 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
191 unsigned long irqflags;
192
193 @@ -903,12 +894,13 @@ static int vc4_plane_allocate_lbm(struct
194 struct vc4_dev *vc4 = to_vc4_dev(drm);
195 struct drm_plane *plane = state->plane;
196 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
197 - unsigned long irqflags;
198 u32 lbm_size;
199
200 lbm_size = vc4_lbm_size(state);
201 - if (!lbm_size)
202 + if (!lbm_size) {
203 + vc4_state->lbm_size = 0;
204 return 0;
205 + }
206
207 /*
208 * NOTE: BCM2712 doesn't need to be aligned, since the size
209 @@ -925,28 +917,10 @@ static int vc4_plane_allocate_lbm(struct
210 if (WARN_ON(!vc4_state->lbm_offset))
211 return -EINVAL;
212
213 - /* Allocate the LBM memory that the HVS will use for temporary
214 - * storage due to our scaling/format conversion.
215 + /* FIXME: Add loop here that ensures that the total LBM assigned in this
216 + * state is less than the total lbm size
217 */
218 - if (!drm_mm_node_allocated(&vc4_state->lbm)) {
219 - int ret;
220 -
221 - spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
222 - ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
223 - &vc4_state->lbm,
224 - lbm_size, 1,
225 - 0, 0);
226 - spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
227 -
228 - if (ret) {
229 - drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
230 - return ret;
231 - }
232 - } else {
233 - WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
234 - }
235 -
236 - vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
237 + vc4_state->lbm_size = lbm_size;
238
239 return 0;
240 }