1 From fc26e29e257c8d737b78e4581f7ffd9be338a70c Mon Sep 17 00:00:00 2001
2 From: Dave Stevenson <dave.stevenson@raspberrypi.com>
3 Date: Tue, 27 Apr 2021 14:24:21 +0200
4 Subject: [PATCH] drm/vc4: Add support for gamma on BCM2711
6 BCM2711 changes from a 256 entry lookup table to a 16 point
7 piecewise linear function as the pipeline bitdepth has increased
8 to make a LUT unwieldy.
10 Implement a simple conversion from a 256 entry LUT that userspace
11 is likely to expect to 16 evenly spread points in the PWL. This
12 could be improved with curve fitting at a later date.
14 Co-developed-by: Juerg Haefliger <juergh@canonical.com>
15 Signed-off-by: Juerg Haefliger <juergh@canonical.com>
16 Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
17 Signed-off-by: Maxime Ripard <maxime@cerno.tech>
19 drivers/gpu/drm/vc4/vc4_crtc.c | 35 +++++++++++---
20 drivers/gpu/drm/vc4/vc4_drv.h | 28 +++++++++--
21 drivers/gpu/drm/vc4/vc4_hvs.c | 87 ++++++++++++++++++++++++++++++++--
22 drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++++++
23 4 files changed, 159 insertions(+), 13 deletions(-)
25 --- a/drivers/gpu/drm/vc4/vc4_crtc.c
26 +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
27 @@ -1164,19 +1164,42 @@ int vc4_crtc_init(struct drm_device *drm
29 if (!vc4->hvs->hvs5) {
30 drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
32 + /* This is a lie for hvs5 which uses a 16 point PWL, but it
33 + * allows for something smarter than just 16 linearly spaced
34 + * segments. Conversion is done in vc5_hvs_update_gamma_lut.
36 + drm_mode_crtc_set_gamma_size(crtc, 256);
39 - drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
40 + drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
42 + if (!vc4->hvs->hvs5) {
43 /* We support CTM, but only for one CRTC at a time. It's therefore
44 * implemented as private driver state in vc4_kms, not here.
46 drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size);
49 - for (i = 0; i < crtc->gamma_size; i++) {
50 - vc4_crtc->lut_r[i] = i;
51 - vc4_crtc->lut_g[i] = i;
52 - vc4_crtc->lut_b[i] = i;
53 + /* Initialize the VC4 gamma LUTs */
54 + for (i = 0; i < crtc->gamma_size; i++) {
55 + vc4_crtc->lut_r[i] = i;
56 + vc4_crtc->lut_g[i] = i;
57 + vc4_crtc->lut_b[i] = i;
60 + /* Initialize the VC5 gamma PWL entries. Assume 12-bit pipeline,
61 + * evenly spread over full range.
63 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
64 + vc4_crtc->pwl_r[i] =
65 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
66 + vc4_crtc->pwl_g[i] =
67 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
68 + vc4_crtc->pwl_b[i] =
69 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
70 + vc4_crtc->pwl_a[i] =
71 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
76 --- a/drivers/gpu/drm/vc4/vc4_drv.h
77 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
79 #include <drm/drm_modeset_lock.h>
81 #include "uapi/drm/vc4_drm.h"
82 +#include "vc4_regs.h"
85 struct drm_gem_object;
86 @@ -481,6 +482,17 @@ struct vc4_pv_data {
90 +struct vc5_gamma_entry {
95 +#define VC5_HVS_SET_GAMMA_ENTRY(x, c, g) (struct vc5_gamma_entry){ \
96 + .x_c_terms = VC4_SET_FIELD((x), SCALER5_DSPGAMMA_OFF_X) | \
97 + VC4_SET_FIELD((c), SCALER5_DSPGAMMA_OFF_C), \
102 struct drm_crtc base;
103 struct platform_device *pdev;
104 @@ -490,9 +502,19 @@ struct vc4_crtc {
105 /* Timestamp at start of vblank irq - unaffected by lock delays. */
112 + struct { /* VC4 gamma LUT */
117 + struct { /* VC5 gamma PWL entries */
118 + struct vc5_gamma_entry pwl_r[SCALER5_DSPGAMMA_NUM_POINTS];
119 + struct vc5_gamma_entry pwl_g[SCALER5_DSPGAMMA_NUM_POINTS];
120 + struct vc5_gamma_entry pwl_b[SCALER5_DSPGAMMA_NUM_POINTS];
121 + struct vc5_gamma_entry pwl_a[SCALER5_DSPGAMMA_NUM_POINTS];
125 struct drm_pending_vblank_event *event;
127 --- a/drivers/gpu/drm/vc4/vc4_hvs.c
128 +++ b/drivers/gpu/drm/vc4/vc4_hvs.c
129 @@ -236,6 +236,80 @@ static void vc4_hvs_update_gamma_lut(str
130 vc4_hvs_lut_load(crtc);
133 +static void vc5_hvs_write_gamma_entry(struct vc4_dev *vc4,
135 + struct vc5_gamma_entry *gamma)
137 + HVS_WRITE(offset, gamma->x_c_terms);
138 + HVS_WRITE(offset + 4, gamma->grad_term);
141 +static void vc5_hvs_lut_load(struct drm_crtc *crtc)
143 + struct drm_device *dev = crtc->dev;
144 + struct vc4_dev *vc4 = to_vc4_dev(dev);
145 + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
146 + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
148 + u32 offset = SCALER5_DSPGAMMA_START +
149 + vc4_state->assigned_channel * SCALER5_DSPGAMMA_CHAN_OFFSET;
151 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
152 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_r[i]);
153 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
154 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_g[i]);
155 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
156 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_b[i]);
158 + if (vc4_state->assigned_channel == 2) {
159 + /* Alpha only valid on channel 2 */
160 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
161 + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_a[i]);
165 +static void vc5_hvs_update_gamma_lut(struct drm_crtc *crtc)
167 + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
168 + struct drm_color_lut *lut = crtc->state->gamma_lut->data;
169 + unsigned int step, i;
172 +#define VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl, chan) \
173 + start = drm_color_lut_extract(lut[i * step].chan, 12); \
174 + end = drm_color_lut_extract(lut[(i + 1) * step - 1].chan, 12); \
176 + /* Negative gradients not permitted by the hardware, so \
177 + * flatten such points out. \
182 + /* Assume 12bit pipeline. \
183 + * X evenly spread over full range (12 bit). \
184 + * C as U12.4 format. \
185 + * Gradient as U4.8 format. \
187 + vc4_crtc->pwl[i] = \
188 + VC5_HVS_SET_GAMMA_ENTRY(i << 8, start << 4, \
189 + ((end - start) << 4) / (step - 1))
191 + /* HVS5 has a 16 point piecewise linear function for each colour
192 + * channel (including alpha on channel 2) on each display channel.
194 + * Currently take a crude subsample of the gamma LUT, but this could
195 + * be improved to implement curve fitting.
197 + step = crtc->gamma_size / SCALER5_DSPGAMMA_NUM_POINTS;
198 + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
199 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_r, red);
200 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_g, green);
201 + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_b, blue);
204 + vc5_hvs_lut_load(crtc);
207 int vc4_hvs_get_fifo_from_output(struct drm_device *dev, unsigned int output)
209 struct vc4_dev *vc4 = to_vc4_dev(dev);
210 @@ -329,14 +403,16 @@ static int vc4_hvs_init_channel(struct v
211 dispbkgndx &= ~SCALER_DISPBKGND_INTERLACE;
213 HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx |
214 - SCALER_DISPBKGND_AUTOHS |
215 - ((!vc4->hvs->hvs5) ? SCALER_DISPBKGND_GAMMA : 0) |
216 + SCALER_DISPBKGND_AUTOHS | SCALER_DISPBKGND_GAMMA |
217 (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
219 /* Reload the LUT, since the SRAMs would have been disabled if
220 * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
222 - vc4_hvs_lut_load(crtc);
223 + if (!vc4->hvs->hvs5)
224 + vc4_hvs_lut_load(crtc);
226 + vc5_hvs_lut_load(crtc);
230 @@ -534,7 +610,10 @@ void vc4_hvs_atomic_flush(struct drm_crt
231 u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_state->assigned_channel));
233 if (crtc->state->gamma_lut) {
234 - vc4_hvs_update_gamma_lut(crtc);
235 + if (!vc4->hvs->hvs5)
236 + vc4_hvs_update_gamma_lut(crtc);
238 + vc5_hvs_update_gamma_lut(crtc);
239 dispbkgndx |= SCALER_DISPBKGND_GAMMA;
241 /* Unsetting DISPBKGND_GAMMA skips the gamma lut step
242 --- a/drivers/gpu/drm/vc4/vc4_regs.h
243 +++ b/drivers/gpu/drm/vc4/vc4_regs.h
245 #define SCALER_DLIST_START 0x00002000
246 #define SCALER_DLIST_SIZE 0x00004000
248 +/* Gamma PWL for each channel. 16 points for each of 4 colour channels (alpha
249 + * only on channel 2). 8 bytes per entry, offsets first, then gradient:
252 + * Values for X and C are left justified, and vary depending on the width of
254 + * 8-bit pipeline: X uses [31:24], C is U8.8 format, and GRAD is U4.8.
255 + * 12-bit pipeline: X uses [31:20], C is U12.4 format, and GRAD is U4.8.
257 + * The 3 HVS channels start at 0x400 offsets (ie chan 1 starts at 0x2400, and
258 + * chan 2 at 0x2800).
260 +#define SCALER5_DSPGAMMA_NUM_POINTS 16
261 +#define SCALER5_DSPGAMMA_START 0x00002000
262 +#define SCALER5_DSPGAMMA_CHAN_OFFSET 0x400
263 +# define SCALER5_DSPGAMMA_OFF_X_MASK VC4_MASK(31, 20)
264 +# define SCALER5_DSPGAMMA_OFF_X_SHIFT 20
265 +# define SCALER5_DSPGAMMA_OFF_C_MASK VC4_MASK(15, 0)
266 +# define SCALER5_DSPGAMMA_OFF_C_SHIFT 0
267 +# define SCALER5_DSPGAMMA_GRAD_MASK VC4_MASK(11, 0)
268 +# define SCALER5_DSPGAMMA_GRAD_SHIFT 0
270 #define SCALER5_DLIST_START 0x00004000
272 # define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1)