brcm2708: update linux 4.4 patches to latest version
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.4 / 0456-drm-vc4-Implement-precise-vblank-timestamping.patch
1 From d1a9a03cbe1110756a63d4a3747e22eb8417f75e Mon Sep 17 00:00:00 2001
2 From: Mario Kleiner <mario.kleiner.de@gmail.com>
3 Date: Thu, 23 Jun 2016 08:17:50 +0200
4 Subject: [PATCH] drm/vc4: Implement precise vblank timestamping.
5
6 Precise vblank timestamping is implemented via the
7 usual scanout position based method. On VC4 the
8 pixelvalves PV do not have a scanout position
9 register. Only the hardware video scaler HVS has a
10 similar register which describes which scanline for
11 the output is currently composited and stored in the
12 HVS fifo for later consumption by the PV.
13
14 This causes a problem in that the HVS runs at a much
15 faster clock (system clock / audio gate) than the PV
16 which runs at video mode dot clock, so the unless the
17 fifo between HVS and PV is full, the HVS will progress
18 faster in its observable read line position than video
19 scan rate, so the HVS position reading can't be directly
20 translated into a scanout position for timestamp correction.
21
22 Additionally when the PV is in vblank, it doesn't consume
23 from the fifo, so the fifo gets full very quickly and then
24 the HVS stops compositing until the PV enters active scanout
25 and starts consuming scanlines from the fifo again, making
26 new space for the HVS to composite.
27
28 Therefore a simple translation of HVS read position into
29 elapsed time since (or to) start of active scanout does
30 not work, but for the most interesting cases we can still
31 get useful and sufficiently accurate results:
32
33 1. The PV enters active scanout of a new frame with the
34 fifo of the HVS completely full, and the HVS can refill
35 any fifo line which gets consumed and thereby freed up by
36 the PV during active scanout very quickly. Therefore the
37 PV and HVS work effectively in lock-step during active
38 scanout with the fifo never having more than 1 scanline
39 freed up by the PV before it gets refilled. The PV's
40 real scanout position is therefore trailing the HVS
41 compositing position as scanoutpos = hvspos - fifosize
42 and we can get the true scanoutpos as HVS readpos minus
43 fifo size, so precise timestamping works while in active
44 scanout, except for the last few scanlines of the frame,
45 when the HVS reaches end of frame, stops compositing and
46 the PV catches up and drains the fifo. This special case
47 would only introduce minor errors though.
48
49 2. If we are in vblank, then we can only guess something
50 reasonable. If called from vblank irq, we assume the irq is
51 usually dispatched with minimum delay, so we can take a
52 timestamp taken at entry into the vblank irq handler as a
53 baseline and then add a full vblank duration until the
54 guessed start of active scanout. As irq dispatch is usually
55 pretty low latency this works with relatively low jitter and
56 good results.
57
58 If we aren't called from vblank then we could be anywhere
59 within the vblank interval, so we return a neutral result,
60 simply the current system timestamp, and hope for the best.
61
62 Measurement shows the generated timestamps to be rather precise,
63 and at least never off more than 1 vblank duration worst-case.
64
65 Limitations: Doesn't work well yet for interlaced video modes,
66 therefore disabled in interlaced mode for now.
67
68 v2: Use the DISPBASE registers to determine the FIFO size (changes
69 by anholt)
70
71 Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
72 Signed-off-by: Eric Anholt <eric@anholt.net>
73 Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> (v2)
74 (cherry picked from commit 1bf59f1dcbe25272f6b5d870054647e58a8a9c55)
75 ---
76 drivers/gpu/drm/vc4/vc4_crtc.c | 162 +++++++++++++++++++++++++++++++++++++++++
77 drivers/gpu/drm/vc4/vc4_drv.c | 2 +
78 drivers/gpu/drm/vc4/vc4_drv.h | 7 ++
79 drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++-
80 4 files changed, 192 insertions(+), 1 deletion(-)
81
82 --- a/drivers/gpu/drm/vc4/vc4_crtc.c
83 +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
84 @@ -47,12 +47,17 @@ struct vc4_crtc {
85 const struct vc4_crtc_data *data;
86 void __iomem *regs;
87
88 + /* Timestamp at start of vblank irq - unaffected by lock delays. */
89 + ktime_t t_vblank;
90 +
91 /* Which HVS channel we're using for our CRTC. */
92 int channel;
93
94 u8 lut_r[256];
95 u8 lut_g[256];
96 u8 lut_b[256];
97 + /* Size in pixels of the COB memory allocated to this CRTC. */
98 + u32 cob_size;
99
100 struct drm_pending_vblank_event *event;
101 };
102 @@ -134,6 +139,144 @@ int vc4_crtc_debugfs_regs(struct seq_fil
103 }
104 #endif
105
106 +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
107 + unsigned int flags, int *vpos, int *hpos,
108 + ktime_t *stime, ktime_t *etime,
109 + const struct drm_display_mode *mode)
110 +{
111 + struct vc4_dev *vc4 = to_vc4_dev(dev);
112 + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
113 + u32 val;
114 + int fifo_lines;
115 + int vblank_lines;
116 + int ret = 0;
117 +
118 + /*
119 + * XXX Doesn't work well in interlaced mode yet, partially due
120 + * to problems in vc4 kms or drm core interlaced mode handling,
121 + * so disable for now in interlaced mode.
122 + */
123 + if (mode->flags & DRM_MODE_FLAG_INTERLACE)
124 + return ret;
125 +
126 + /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
127 +
128 + /* Get optional system timestamp before query. */
129 + if (stime)
130 + *stime = ktime_get();
131 +
132 + /*
133 + * Read vertical scanline which is currently composed for our
134 + * pixelvalve by the HVS, and also the scaler status.
135 + */
136 + val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
137 +
138 + /* Get optional system timestamp after query. */
139 + if (etime)
140 + *etime = ktime_get();
141 +
142 + /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
143 +
144 + /* Vertical position of hvs composed scanline. */
145 + *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
146 +
147 + /* No hpos info available. */
148 + if (hpos)
149 + *hpos = 0;
150 +
151 + /* This is the offset we need for translating hvs -> pv scanout pos. */
152 + fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
153 +
154 + if (fifo_lines > 0)
155 + ret |= DRM_SCANOUTPOS_VALID;
156 +
157 + /* HVS more than fifo_lines into frame for compositing? */
158 + if (*vpos > fifo_lines) {
159 + /*
160 + * We are in active scanout and can get some meaningful results
161 + * from HVS. The actual PV scanout can not trail behind more
162 + * than fifo_lines as that is the fifo's capacity. Assume that
163 + * in active scanout the HVS and PV work in lockstep wrt. HVS
164 + * refilling the fifo and PV consuming from the fifo, ie.
165 + * whenever the PV consumes and frees up a scanline in the
166 + * fifo, the HVS will immediately refill it, therefore
167 + * incrementing vpos. Therefore we choose HVS read position -
168 + * fifo size in scanlines as a estimate of the real scanout
169 + * position of the PV.
170 + */
171 + *vpos -= fifo_lines + 1;
172 + if (mode->flags & DRM_MODE_FLAG_INTERLACE)
173 + *vpos /= 2;
174 +
175 + ret |= DRM_SCANOUTPOS_ACCURATE;
176 + return ret;
177 + }
178 +
179 + /*
180 + * Less: This happens when we are in vblank and the HVS, after getting
181 + * the VSTART restart signal from the PV, just started refilling its
182 + * fifo with new lines from the top-most lines of the new framebuffers.
183 + * The PV does not scan out in vblank, so does not remove lines from
184 + * the fifo, so the fifo will be full quickly and the HVS has to pause.
185 + * We can't get meaningful readings wrt. scanline position of the PV
186 + * and need to make things up in a approximative but consistent way.
187 + */
188 + ret |= DRM_SCANOUTPOS_IN_VBLANK;
189 + vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
190 +
191 + if (flags & DRM_CALLED_FROM_VBLIRQ) {
192 + /*
193 + * Assume the irq handler got called close to first
194 + * line of vblank, so PV has about a full vblank
195 + * scanlines to go, and as a base timestamp use the
196 + * one taken at entry into vblank irq handler, so it
197 + * is not affected by random delays due to lock
198 + * contention on event_lock or vblank_time lock in
199 + * the core.
200 + */
201 + *vpos = -vblank_lines;
202 +
203 + if (stime)
204 + *stime = vc4_crtc->t_vblank;
205 + if (etime)
206 + *etime = vc4_crtc->t_vblank;
207 +
208 + /*
209 + * If the HVS fifo is not yet full then we know for certain
210 + * we are at the very beginning of vblank, as the hvs just
211 + * started refilling, and the stime and etime timestamps
212 + * truly correspond to start of vblank.
213 + */
214 + if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
215 + ret |= DRM_SCANOUTPOS_ACCURATE;
216 + } else {
217 + /*
218 + * No clue where we are inside vblank. Return a vpos of zero,
219 + * which will cause calling code to just return the etime
220 + * timestamp uncorrected. At least this is no worse than the
221 + * standard fallback.
222 + */
223 + *vpos = 0;
224 + }
225 +
226 + return ret;
227 +}
228 +
229 +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
230 + int *max_error, struct timeval *vblank_time,
231 + unsigned flags)
232 +{
233 + struct vc4_dev *vc4 = to_vc4_dev(dev);
234 + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
235 + struct drm_crtc *crtc = &vc4_crtc->base;
236 + struct drm_crtc_state *state = crtc->state;
237 +
238 + /* Helper routine in DRM core does all the work: */
239 + return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
240 + vblank_time, flags,
241 + &state->adjusted_mode);
242 +}
243 +
244 static void vc4_crtc_destroy(struct drm_crtc *crtc)
245 {
246 drm_crtc_cleanup(crtc);
247 @@ -535,6 +678,7 @@ static irqreturn_t vc4_crtc_irq_handler(
248 irqreturn_t ret = IRQ_NONE;
249
250 if (stat & PV_INT_VFP_START) {
251 + vc4_crtc->t_vblank = ktime_get();
252 CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
253 drm_crtc_handle_vblank(&vc4_crtc->base);
254 vc4_crtc_handle_page_flip(vc4_crtc);
255 @@ -759,6 +903,22 @@ static void vc4_set_crtc_possible_masks(
256 }
257 }
258
259 +static void
260 +vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
261 +{
262 + struct drm_device *drm = vc4_crtc->base.dev;
263 + struct vc4_dev *vc4 = to_vc4_dev(drm);
264 + u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
265 + /* Top/base are supposed to be 4-pixel aligned, but the
266 + * Raspberry Pi firmware fills the low bits (which are
267 + * presumably ignored).
268 + */
269 + u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
270 + u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
271 +
272 + vc4_crtc->cob_size = top - base + 4;
273 +}
274 +
275 static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
276 {
277 struct platform_device *pdev = to_platform_device(dev);
278 @@ -835,6 +995,8 @@ static int vc4_crtc_bind(struct device *
279 crtc->cursor = cursor_plane;
280 }
281
282 + vc4_crtc_get_cob_allocation(vc4_crtc);
283 +
284 CRTC_WRITE(PV_INTEN, 0);
285 CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
286 ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
287 --- a/drivers/gpu/drm/vc4/vc4_drv.c
288 +++ b/drivers/gpu/drm/vc4/vc4_drv.c
289 @@ -116,6 +116,8 @@ static struct drm_driver vc4_drm_driver
290 .enable_vblank = vc4_enable_vblank,
291 .disable_vblank = vc4_disable_vblank,
292 .get_vblank_counter = drm_vblank_no_hw_counter,
293 + .get_scanout_position = vc4_crtc_get_scanoutpos,
294 + .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
295
296 #if defined(CONFIG_DEBUG_FS)
297 .debugfs_init = vc4_debugfs_init,
298 --- a/drivers/gpu/drm/vc4/vc4_drv.h
299 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
300 @@ -419,6 +419,13 @@ int vc4_enable_vblank(struct drm_device
301 void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
302 void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
303 int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
304 +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
305 + unsigned int flags, int *vpos, int *hpos,
306 + ktime_t *stime, ktime_t *etime,
307 + const struct drm_display_mode *mode);
308 +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
309 + int *max_error, struct timeval *vblank_time,
310 + unsigned flags);
311
312 /* vc4_debugfs.c */
313 int vc4_debugfs_init(struct drm_minor *minor);
314 --- a/drivers/gpu/drm/vc4/vc4_regs.h
315 +++ b/drivers/gpu/drm/vc4/vc4_regs.h
316 @@ -368,7 +368,6 @@
317 # define SCALER_DISPBKGND_FILL BIT(24)
318
319 #define SCALER_DISPSTAT0 0x00000048
320 -#define SCALER_DISPBASE0 0x0000004c
321 # define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30)
322 # define SCALER_DISPSTATX_MODE_SHIFT 30
323 # define SCALER_DISPSTATX_MODE_DISABLED 0
324 @@ -377,6 +376,24 @@
325 # define SCALER_DISPSTATX_MODE_EOF 3
326 # define SCALER_DISPSTATX_FULL BIT(29)
327 # define SCALER_DISPSTATX_EMPTY BIT(28)
328 +# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12)
329 +# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12
330 +# define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0)
331 +# define SCALER_DISPSTATX_LINE_SHIFT 0
332 +
333 +#define SCALER_DISPBASE0 0x0000004c
334 +/* Last pixel in the COB (display FIFO memory) allocated to this HVS
335 + * channel. Must be 4-pixel aligned (and thus 4 pixels less than the
336 + * next COB base).
337 + */
338 +# define SCALER_DISPBASEX_TOP_MASK VC4_MASK(31, 16)
339 +# define SCALER_DISPBASEX_TOP_SHIFT 16
340 +/* First pixel in the COB (display FIFO memory) allocated to this HVS
341 + * channel. Must be 4-pixel aligned.
342 + */
343 +# define SCALER_DISPBASEX_BASE_MASK VC4_MASK(15, 0)
344 +# define SCALER_DISPBASEX_BASE_SHIFT 0
345 +
346 #define SCALER_DISPCTRL1 0x00000050
347 #define SCALER_DISPBKGND1 0x00000054
348 #define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \
349 @@ -387,6 +404,9 @@
350 (x) * (SCALER_DISPSTAT1 - \
351 SCALER_DISPSTAT0))
352 #define SCALER_DISPBASE1 0x0000005c
353 +#define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \
354 + (x) * (SCALER_DISPBASE1 - \
355 + SCALER_DISPBASE0))
356 #define SCALER_DISPCTRL2 0x00000060
357 #define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \
358 (x) * (SCALER_DISPCTRL1 - \