c9d039097256c371656cf7909809e79db116a578
[openwrt/openwrt.git] / target / linux / bcm27xx / patches-5.15 / 950-0161-drm-v3d-Clock-V3D-down-when-not-in-use.patch
1 From e796ee16390e63ed5d8713d301d609b32843342d Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Thu, 2 May 2019 13:22:53 -0700
4 Subject: [PATCH] drm/v3d: Clock V3D down when not in use.
5
6 My various attempts at re-enabling runtime PM have failed, so just
7 crank the clock down when V3D is idle to reduce power consumption.
8
9 Signed-off-by: Eric Anholt <eric@anholt.net>
10 ---
11 drivers/gpu/drm/v3d/v3d_drv.c | 18 ++++++++++++
12 drivers/gpu/drm/v3d/v3d_drv.h | 6 ++++
13 drivers/gpu/drm/v3d/v3d_gem.c | 53 +++++++++++++++++++++++++++++++----
14 3 files changed, 72 insertions(+), 5 deletions(-)
15
16 --- a/drivers/gpu/drm/v3d/v3d_drv.c
17 +++ b/drivers/gpu/drm/v3d/v3d_drv.c
18 @@ -301,6 +301,21 @@ static int v3d_platform_drm_probe(struct
19 }
20 }
21
22 + v3d->clk = devm_clk_get(dev, NULL);
23 + if (IS_ERR(v3d->clk)) {
24 + if (ret != -EPROBE_DEFER)
25 + dev_err(dev, "Failed to get clock\n");
26 + goto dev_free;
27 + }
28 + v3d->clk_up_rate = clk_get_rate(v3d->clk);
29 + /* For downclocking, drop it to the minimum frequency we can get from
30 + * the CPRMAN clock generator dividing off our parent. The divider is
31 + * 4 bits, but ask for just higher than that so that rounding doesn't
32 + * make cprman reject our rate.
33 + */
34 + v3d->clk_down_rate =
35 + (clk_get_rate(clk_get_parent(v3d->clk)) / (1 << 4)) + 10000;
36 +
37 if (v3d->ver < 41) {
38 ret = map_regs(v3d, &v3d->gca_regs, "gca");
39 if (ret)
40 @@ -327,6 +342,9 @@ static int v3d_platform_drm_probe(struct
41 if (ret)
42 goto irq_disable;
43
44 + ret = clk_set_rate(v3d->clk, v3d->clk_down_rate);
45 + WARN_ON_ONCE(ret != 0);
46 +
47 return 0;
48
49 irq_disable:
50 --- a/drivers/gpu/drm/v3d/v3d_drv.h
51 +++ b/drivers/gpu/drm/v3d/v3d_drv.h
52 @@ -85,6 +85,12 @@ struct v3d_dev {
53 void __iomem *bridge_regs;
54 void __iomem *gca_regs;
55 struct clk *clk;
56 + struct delayed_work clk_down_work;
57 + unsigned long clk_up_rate, clk_down_rate;
58 + struct mutex clk_lock;
59 + u32 clk_refcount;
60 + bool clk_up;
61 +
62 struct reset_control *reset;
63
64 /* Virtual and DMA addresses of the single shared page table. */
65 --- a/drivers/gpu/drm/v3d/v3d_gem.c
66 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
67 @@ -4,6 +4,7 @@
68 #include <linux/device.h>
69 #include <linux/dma-mapping.h>
70 #include <linux/io.h>
71 +#include <linux/clk.h>
72 #include <linux/module.h>
73 #include <linux/platform_device.h>
74 #include <linux/pm_runtime.h>
75 @@ -19,6 +20,47 @@
76 #include "v3d_trace.h"
77
78 static void
79 +v3d_clock_down_work(struct work_struct *work)
80 +{
81 + struct v3d_dev *v3d =
82 + container_of(work, struct v3d_dev, clk_down_work.work);
83 + int ret;
84 +
85 + ret = clk_set_rate(v3d->clk, v3d->clk_down_rate);
86 + v3d->clk_up = false;
87 + WARN_ON_ONCE(ret != 0);
88 +}
89 +
90 +static void
91 +v3d_clock_up_get(struct v3d_dev *v3d)
92 +{
93 + mutex_lock(&v3d->clk_lock);
94 + if (v3d->clk_refcount++ == 0) {
95 + cancel_delayed_work_sync(&v3d->clk_down_work);
96 + if (!v3d->clk_up) {
97 + int ret;
98 +
99 + ret = clk_set_rate(v3d->clk, v3d->clk_up_rate);
100 + WARN_ON_ONCE(ret != 0);
101 + v3d->clk_up = true;
102 + }
103 + }
104 + mutex_unlock(&v3d->clk_lock);
105 +}
106 +
107 +static void
108 +v3d_clock_up_put(struct v3d_dev *v3d)
109 +{
110 + mutex_lock(&v3d->clk_lock);
111 + if (--v3d->clk_refcount == 0) {
112 + schedule_delayed_work(&v3d->clk_down_work,
113 + msecs_to_jiffies(100));
114 + }
115 + mutex_unlock(&v3d->clk_lock);
116 +}
117 +
118 +
119 +static void
120 v3d_init_core(struct v3d_dev *v3d, int core)
121 {
122 /* Set OVRTMUOUT, which means that the texture sampler uniform
123 @@ -358,6 +400,7 @@ v3d_job_free(struct kref *ref)
124 struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
125 unsigned long index;
126 struct dma_fence *fence;
127 + struct v3d_dev *v3d = job->v3d;
128 int i;
129
130 for (i = 0; i < job->bo_count; i++) {
131 @@ -371,11 +414,7 @@ v3d_job_free(struct kref *ref)
132 }
133 xa_destroy(&job->deps);
134
135 - dma_fence_put(job->irq_fence);
136 - dma_fence_put(job->done_fence);
137 -
138 - pm_runtime_mark_last_busy(job->v3d->drm.dev);
139 - pm_runtime_put_autosuspend(job->v3d->drm.dev);
140 + v3d_clock_up_put(v3d);
141
142 if (job->perfmon)
143 v3d_perfmon_put(job->perfmon);
144 @@ -460,6 +499,7 @@ v3d_job_init(struct v3d_dev *v3d, struct
145 if (ret)
146 goto fail;
147
148 + v3d_clock_up_get(v3d);
149 kref_init(&job->refcount);
150
151 return 0;
152 @@ -914,6 +954,9 @@ v3d_gem_init(struct drm_device *dev)
153 mutex_init(&v3d->sched_lock);
154 mutex_init(&v3d->cache_clean_lock);
155
156 + mutex_init(&v3d->clk_lock);
157 + INIT_DELAYED_WORK(&v3d->clk_down_work, v3d_clock_down_work);
158 +
159 /* Note: We don't allocate address 0. Various bits of HW
160 * treat 0 as special, such as the occlusion query counters
161 * where 0 means "disabled".