bcm27xx: add support for linux v5.15
[openwrt/openwrt.git] / target / linux / bcm27xx / patches-5.15 / 950-0551-drm-vc4-Add-support-for-gamma-on-BCM2711.patch
1 From fc26e29e257c8d737b78e4581f7ffd9be338a70c Mon Sep 17 00:00:00 2001
2 From: Dave Stevenson <dave.stevenson@raspberrypi.com>
3 Date: Tue, 27 Apr 2021 14:24:21 +0200
4 Subject: [PATCH] drm/vc4: Add support for gamma on BCM2711
5
6 BCM2711 changes from a 256 entry lookup table to a 16 point
7 piecewise linear function as the pipeline bitdepth has increased
8 to make a LUT unwieldy.
9
10 Implement a simple conversion from a 256 entry LUT that userspace
11 is likely to expect to 16 evenly spread points in the PWL. This
12 could be improved with curve fitting at a later date.
13
14 Co-developed-by: Juerg Haefliger <juergh@canonical.com>
15 Signed-off-by: Juerg Haefliger <juergh@canonical.com>
16 Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
17 Signed-off-by: Maxime Ripard <maxime@cerno.tech>
18 ---
19 drivers/gpu/drm/vc4/vc4_crtc.c | 35 +++++++++++---
20 drivers/gpu/drm/vc4/vc4_drv.h | 28 +++++++++--
21 drivers/gpu/drm/vc4/vc4_hvs.c | 87 ++++++++++++++++++++++++++++++++--
22 drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++++++
23 4 files changed, 159 insertions(+), 13 deletions(-)
24
25 --- a/drivers/gpu/drm/vc4/vc4_crtc.c
26 +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
27 @@ -1164,19 +1164,42 @@ int vc4_crtc_init(struct drm_device *drm
28
29 if (!vc4->hvs->hvs5) {
30 drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
31 + } else {
32 + /* This is a lie for hvs5 which uses a 16 point PWL, but it
33 + * allows for something smarter than just 16 linearly spaced
34 + * segments. Conversion is done in vc5_hvs_update_gamma_lut.
35 + */
36 + drm_mode_crtc_set_gamma_size(crtc, 256);
37 + }
38
39 - drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
40 + drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
41
42 + if (!vc4->hvs->hvs5) {
43 /* We support CTM, but only for one CRTC at a time. It's therefore
44 * implemented as private driver state in vc4_kms, not here.
45 */
46 drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size);
47 - }
48
49 - for (i = 0; i < crtc->gamma_size; i++) {
50 - vc4_crtc->lut_r[i] = i;
51 - vc4_crtc->lut_g[i] = i;
52 - vc4_crtc->lut_b[i] = i;
53 + /* Initialize the VC4 gamma LUTs */
54 + for (i = 0; i < crtc->gamma_size; i++) {
55 + vc4_crtc->lut_r[i] = i;
56 + vc4_crtc->lut_g[i] = i;
57 + vc4_crtc->lut_b[i] = i;
58 + }
59 + } else {
60 + /* Initialize the VC5 gamma PWL entries. Assume 12-bit pipeline,
61 + * evenly spread over full range.
62 + */
63 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
64 + vc4_crtc->pwl_r[i] =
65 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
66 + vc4_crtc->pwl_g[i] =
67 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
68 + vc4_crtc->pwl_b[i] =
69 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
70 + vc4_crtc->pwl_a[i] =
71 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
72 + }
73 }
74
75 return 0;
76 --- a/drivers/gpu/drm/vc4/vc4_drv.h
77 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
78 @@ -19,6 +19,7 @@
79 #include <drm/drm_modeset_lock.h>
80
81 #include "uapi/drm/vc4_drm.h"
82 +#include "vc4_regs.h"
83
84 struct drm_device;
85 struct drm_gem_object;
86 @@ -481,6 +482,17 @@ struct vc4_pv_data {
87
88 };
89
90 +struct vc5_gamma_entry {
91 + u32 x_c_terms;
92 + u32 grad_term;
93 +};
94 +
95 +#define VC5_HVS_SET_GAMMA_ENTRY(x, c, g) (struct vc5_gamma_entry){ \
96 + .x_c_terms = VC4_SET_FIELD((x), SCALER5_DSPGAMMA_OFF_X) | \
97 + VC4_SET_FIELD((c), SCALER5_DSPGAMMA_OFF_C), \
98 + .grad_term = (g) \
99 +}
100 +
101 struct vc4_crtc {
102 struct drm_crtc base;
103 struct platform_device *pdev;
104 @@ -490,9 +502,19 @@ struct vc4_crtc {
105 /* Timestamp at start of vblank irq - unaffected by lock delays. */
106 ktime_t t_vblank;
107
108 - u8 lut_r[256];
109 - u8 lut_g[256];
110 - u8 lut_b[256];
111 + union {
112 + struct { /* VC4 gamma LUT */
113 + u8 lut_r[256];
114 + u8 lut_g[256];
115 + u8 lut_b[256];
116 + };
117 + struct { /* VC5 gamma PWL entries */
118 + struct vc5_gamma_entry pwl_r[SCALER5_DSPGAMMA_NUM_POINTS];
119 + struct vc5_gamma_entry pwl_g[SCALER5_DSPGAMMA_NUM_POINTS];
120 + struct vc5_gamma_entry pwl_b[SCALER5_DSPGAMMA_NUM_POINTS];
121 + struct vc5_gamma_entry pwl_a[SCALER5_DSPGAMMA_NUM_POINTS];
122 + };
123 + };
124
125 struct drm_pending_vblank_event *event;
126
127 --- a/drivers/gpu/drm/vc4/vc4_hvs.c
128 +++ b/drivers/gpu/drm/vc4/vc4_hvs.c
129 @@ -236,6 +236,80 @@ static void vc4_hvs_update_gamma_lut(str
130 vc4_hvs_lut_load(crtc);
131 }
132
133 +static void vc5_hvs_write_gamma_entry(struct vc4_dev *vc4,
134 + u32 offset,
135 + struct vc5_gamma_entry *gamma)
136 +{
137 + HVS_WRITE(offset, gamma->x_c_terms);
138 + HVS_WRITE(offset + 4, gamma->grad_term);
139 +}
140 +
141 +static void vc5_hvs_lut_load(struct drm_crtc *crtc)
142 +{
143 + struct drm_device *dev = crtc->dev;
144 + struct vc4_dev *vc4 = to_vc4_dev(dev);
145 + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
146 + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
147 + u32 i;
148 + u32 offset = SCALER5_DSPGAMMA_START +
149 + vc4_state->assigned_channel * SCALER5_DSPGAMMA_CHAN_OFFSET;
150 +
151 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
152 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_r[i]);
153 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
154 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_g[i]);
155 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
156 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_b[i]);
157 +
158 + if (vc4_state->assigned_channel == 2) {
159 + /* Alpha only valid on channel 2 */
160 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
161 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_a[i]);
162 + }
163 +}
164 +
165 +static void vc5_hvs_update_gamma_lut(struct drm_crtc *crtc)
166 +{
167 + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
168 + struct drm_color_lut *lut = crtc->state->gamma_lut->data;
169 + unsigned int step, i;
170 + u32 start, end;
171 +
172 +#define VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl, chan) \
173 + start = drm_color_lut_extract(lut[i * step].chan, 12); \
174 + end = drm_color_lut_extract(lut[(i + 1) * step - 1].chan, 12); \
175 + \
176 + /* Negative gradients not permitted by the hardware, so \
177 + * flatten such points out. \
178 + */ \
179 + if (end < start) \
180 + end = start; \
181 + \
182 + /* Assume 12bit pipeline. \
183 + * X evenly spread over full range (12 bit). \
184 + * C as U12.4 format. \
185 + * Gradient as U4.8 format. \
186 + */ \
187 + vc4_crtc->pwl[i] = \
188 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, start << 4, \
189 + ((end - start) << 4) / (step - 1))
190 +
191 + /* HVS5 has a 16 point piecewise linear function for each colour
192 + * channel (including alpha on channel 2) on each display channel.
193 + *
194 + * Currently take a crude subsample of the gamma LUT, but this could
195 + * be improved to implement curve fitting.
196 + */
197 + step = crtc->gamma_size / SCALER5_DSPGAMMA_NUM_POINTS;
198 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
199 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_r, red);
200 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_g, green);
201 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_b, blue);
202 + }
203 +
204 + vc5_hvs_lut_load(crtc);
205 +}
206 +
207 int vc4_hvs_get_fifo_from_output(struct drm_device *dev, unsigned int output)
208 {
209 struct vc4_dev *vc4 = to_vc4_dev(dev);
210 @@ -329,14 +403,16 @@ static int vc4_hvs_init_channel(struct v
211 dispbkgndx &= ~SCALER_DISPBKGND_INTERLACE;
212
213 HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx |
214 - SCALER_DISPBKGND_AUTOHS |
215 - ((!vc4->hvs->hvs5) ? SCALER_DISPBKGND_GAMMA : 0) |
216 + SCALER_DISPBKGND_AUTOHS | SCALER_DISPBKGND_GAMMA |
217 (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
218
219 /* Reload the LUT, since the SRAMs would have been disabled if
220 * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
221 */
222 - vc4_hvs_lut_load(crtc);
223 + if (!vc4->hvs->hvs5)
224 + vc4_hvs_lut_load(crtc);
225 + else
226 + vc5_hvs_lut_load(crtc);
227
228 return 0;
229 }
230 @@ -534,7 +610,10 @@ void vc4_hvs_atomic_flush(struct drm_crt
231 u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_state->assigned_channel));
232
233 if (crtc->state->gamma_lut) {
234 - vc4_hvs_update_gamma_lut(crtc);
235 + if (!vc4->hvs->hvs5)
236 + vc4_hvs_update_gamma_lut(crtc);
237 + else
238 + vc5_hvs_update_gamma_lut(crtc);
239 dispbkgndx |= SCALER_DISPBKGND_GAMMA;
240 } else {
241 /* Unsetting DISPBKGND_GAMMA skips the gamma lut step
242 --- a/drivers/gpu/drm/vc4/vc4_regs.h
243 +++ b/drivers/gpu/drm/vc4/vc4_regs.h
244 @@ -491,6 +491,28 @@
245 #define SCALER_DLIST_START 0x00002000
246 #define SCALER_DLIST_SIZE 0x00004000
247
248 +/* Gamma PWL for each channel. 16 points for each of 4 colour channels (alpha
249 + * only on channel 2). 8 bytes per entry, offsets first, then gradient:
250 + * Y = GRAD * X + C
251 + *
252 + * Values for X and C are left justified, and vary depending on the width of
253 + * the HVS channel:
254 + * 8-bit pipeline: X uses [31:24], C is U8.8 format, and GRAD is U4.8.
255 + * 12-bit pipeline: X uses [31:20], C is U12.4 format, and GRAD is U4.8.
256 + *
257 + * The 3 HVS channels start at 0x400 offsets (ie chan 1 starts at 0x2400, and
258 + * chan 2 at 0x2800).
259 + */
260 +#define SCALER5_DSPGAMMA_NUM_POINTS 16
261 +#define SCALER5_DSPGAMMA_START 0x00002000
262 +#define SCALER5_DSPGAMMA_CHAN_OFFSET 0x400
263 +# define SCALER5_DSPGAMMA_OFF_X_MASK VC4_MASK(31, 20)
264 +# define SCALER5_DSPGAMMA_OFF_X_SHIFT 20
265 +# define SCALER5_DSPGAMMA_OFF_C_MASK VC4_MASK(15, 0)
266 +# define SCALER5_DSPGAMMA_OFF_C_SHIFT 0
267 +# define SCALER5_DSPGAMMA_GRAD_MASK VC4_MASK(11, 0)
268 +# define SCALER5_DSPGAMMA_GRAD_SHIFT 0
269 +
270 #define SCALER5_DLIST_START 0x00004000
271
272 # define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1)