brcm2708: update linux 4.4 patches to latest version
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.4 / 0478-drm-vc4-Add-a-bitmap-of-branch-targets-during-shader.patch
1 From ba3aa9ce57cb933203cb0ebaa7c00ef756e5f84e Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Sat, 2 Jul 2016 10:10:24 -0700
4 Subject: [PATCH] drm/vc4: Add a bitmap of branch targets during shader
5 validation.
6
7 This isn't used yet, it's just a first step toward loop validation.
8 During the main parsing of instructions, we need to know when we hit a
9 new basic block so that we can reset validated state.
10
11 v2: Fix a stray semicolon after an if block. (caught by kbuild test).
12
13 Signed-off-by: Eric Anholt <eric@anholt.net>
14 (cherry picked from commit 93aa9ae3e5523e49e4e5abacd4dbee0e4ab2d931)
15 ---
16 drivers/gpu/drm/vc4/vc4_qpu_defines.h | 12 +++
17 drivers/gpu/drm/vc4/vc4_validate_shaders.c | 114 ++++++++++++++++++++++++++++-
18 2 files changed, 124 insertions(+), 2 deletions(-)
19
20 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h
21 +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
22 @@ -230,6 +230,15 @@ enum qpu_unpack_r4 {
23 #define QPU_COND_MUL_SHIFT 46
24 #define QPU_COND_MUL_MASK QPU_MASK(48, 46)
25
26 +#define QPU_BRANCH_COND_SHIFT 52
27 +#define QPU_BRANCH_COND_MASK QPU_MASK(55, 52)
28 +
29 +#define QPU_BRANCH_REL ((uint64_t)1 << 51)
30 +#define QPU_BRANCH_REG ((uint64_t)1 << 50)
31 +
32 +#define QPU_BRANCH_RADDR_A_SHIFT 45
33 +#define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45)
34 +
35 #define QPU_SF ((uint64_t)1 << 45)
36
37 #define QPU_WADDR_ADD_SHIFT 38
38 @@ -261,4 +270,7 @@ enum qpu_unpack_r4 {
39 #define QPU_OP_ADD_SHIFT 24
40 #define QPU_OP_ADD_MASK QPU_MASK(28, 24)
41
42 +#define QPU_BRANCH_TARGET_SHIFT 0
43 +#define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0)
44 +
45 #endif /* VC4_QPU_DEFINES_H */
46 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
47 +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
48 @@ -59,6 +59,13 @@ struct vc4_shader_validation_state {
49 */
50 uint32_t live_min_clamp_offsets[32 + 32 + 4];
51 bool live_max_clamp_regs[32 + 32 + 4];
52 +
53 + /* Bitfield of which IPs are used as branch targets.
54 + *
55 + * Used for validation that the uniform stream is updated at the right
56 + * points and clearing the texturing/clamping state.
57 + */
58 + unsigned long *branch_targets;
59 };
60
61 static uint32_t
62 @@ -418,13 +425,104 @@ check_instruction_reads(uint64_t inst,
63 return true;
64 }
65
66 +/* Make sure that all branches are absolute and point within the shader, and
67 + * note their targets for later.
68 + */
69 +static bool
70 +vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
71 +{
72 + uint32_t max_branch_target = 0;
73 + bool found_shader_end = false;
74 + int ip;
75 + int shader_end_ip = 0;
76 + int last_branch = -2;
77 +
78 + for (ip = 0; ip < validation_state->max_ip; ip++) {
79 + uint64_t inst = validation_state->shader[ip];
80 + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
81 + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
82 + uint32_t after_delay_ip = ip + 4;
83 + uint32_t branch_target_ip;
84 +
85 + if (sig == QPU_SIG_PROG_END) {
86 + shader_end_ip = ip;
87 + found_shader_end = true;
88 + continue;
89 + }
90 +
91 + if (sig != QPU_SIG_BRANCH)
92 + continue;
93 +
94 + if (ip - last_branch < 4) {
95 + DRM_ERROR("Branch at %d during delay slots\n", ip);
96 + return false;
97 + }
98 + last_branch = ip;
99 +
100 + if (inst & QPU_BRANCH_REG) {
101 + DRM_ERROR("branching from register relative "
102 + "not supported\n");
103 + return false;
104 + }
105 +
106 + if (!(inst & QPU_BRANCH_REL)) {
107 + DRM_ERROR("relative branching required\n");
108 + return false;
109 + }
110 +
111 + /* The actual branch target is the instruction after the delay
112 + * slots, plus whatever byte offset is in the low 32 bits of
113 + * the instruction. Make sure we're not branching beyond the
114 + * end of the shader object.
115 + */
116 + if (branch_imm % sizeof(inst) != 0) {
117 + DRM_ERROR("branch target not aligned\n");
118 + return false;
119 + }
120 +
121 + branch_target_ip = after_delay_ip + (branch_imm >> 3);
122 + if (branch_target_ip >= validation_state->max_ip) {
123 + DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
124 + ip, branch_target_ip,
125 + validation_state->max_ip);
126 + return false;
127 + }
128 + set_bit(branch_target_ip, validation_state->branch_targets);
129 +
130 + /* Make sure that the non-branching path is also not outside
131 + * the shader.
132 + */
133 + if (after_delay_ip >= validation_state->max_ip) {
134 + DRM_ERROR("Branch at %d continues past shader end "
135 + "(%d/%d)\n",
136 + ip, after_delay_ip, validation_state->max_ip);
137 + return false;
138 + }
139 + set_bit(after_delay_ip, validation_state->branch_targets);
140 + max_branch_target = max(max_branch_target, after_delay_ip);
141 +
142 + /* There are two delay slots after program end is signaled
143 + * that are still executed, then we're finished.
144 + */
145 + if (found_shader_end && ip == shader_end_ip + 2)
146 + break;
147 + }
148 +
149 + if (max_branch_target > shader_end_ip) {
150 + DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
151 + return false;
152 + }
153 +
154 + return true;
155 +}
156 +
157 struct vc4_validated_shader_info *
158 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
159 {
160 bool found_shader_end = false;
161 int shader_end_ip = 0;
162 uint32_t ip;
163 - struct vc4_validated_shader_info *validated_shader;
164 + struct vc4_validated_shader_info *validated_shader = NULL;
165 struct vc4_shader_validation_state validation_state;
166 int i;
167
168 @@ -437,9 +535,18 @@ vc4_validate_shader(struct drm_gem_cma_o
169 for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
170 validation_state.live_min_clamp_offsets[i] = ~0;
171
172 + validation_state.branch_targets =
173 + kcalloc(BITS_TO_LONGS(validation_state.max_ip),
174 + sizeof(unsigned long), GFP_KERNEL);
175 + if (!validation_state.branch_targets)
176 + goto fail;
177 +
178 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
179 if (!validated_shader)
180 - return NULL;
181 + goto fail;
182 +
183 + if (!vc4_validate_branches(&validation_state))
184 + goto fail;
185
186 for (ip = 0; ip < validation_state.max_ip; ip++) {
187 uint64_t inst = validation_state.shader[ip];
188 @@ -508,9 +615,12 @@ vc4_validate_shader(struct drm_gem_cma_o
189 (validated_shader->uniforms_size +
190 4 * validated_shader->num_texture_samples);
191
192 + kfree(validation_state.branch_targets);
193 +
194 return validated_shader;
195
196 fail:
197 + kfree(validation_state.branch_targets);
198 if (validated_shader) {
199 kfree(validated_shader->texture_samples);
200 kfree(validated_shader);