musl: backport various post-1.1.15 fixes
[openwrt/staging/blogic.git] / toolchain / musl / patches / 038-fix-regexec-with-haystack-strings-longer-than-int_max.patch
1 From aee6abb2400b9a955c2b41166db1c22f63ad42ef Mon Sep 17 00:00:00 2001
2 From: Rich Felker <dalias@aerifal.cx>
3 Date: Thu, 6 Oct 2016 12:15:47 -0400
4 Subject: fix regexec with haystack strings longer than INT_MAX
5
6 we inherited from TRE regexec code that's utterly wrong with respect
7 to the integer types it's using. while it doesn't appear that
8 compilers are producing unsafe output, signed integer overflows seem
9 to happen, and regexec fails to find matches past offset INT_MAX.
10
11 this patch fixes the type of all variables/fields used to store
12 offsets in the string from int to regoff_t. after the changes, basic
13 testing showed that regexec can now find matches past 2GB (INT_MAX)
14 and past 4GB on x86_64, and code generation is unchanged on i386.
15 ---
16 src/regex/regexec.c | 54 +++++++++++++++++++++++++++--------------------------
17 1 file changed, 28 insertions(+), 26 deletions(-)
18
19 diff --git a/src/regex/regexec.c b/src/regex/regexec.c
20 index dd52319..5c4cb92 100644
21 --- a/src/regex/regexec.c
22 +++ b/src/regex/regexec.c
23 @@ -44,7 +44,7 @@
24
25 static void
26 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
27 - const tre_tnfa_t *tnfa, int *tags, int match_eo);
28 + const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo);
29
30 /***********************************************************************
31 from tre-match-utils.h
32 @@ -97,7 +97,7 @@ tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
33 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
34 static int
35 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
36 - int *t1, int *t2)
37 + regoff_t *t1, regoff_t *t2)
38 {
39 int i;
40 for (i = 0; i < num_tags; i++)
41 @@ -157,25 +157,25 @@ tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
42
43 typedef struct {
44 tre_tnfa_transition_t *state;
45 - int *tags;
46 + regoff_t *tags;
47 } tre_tnfa_reach_t;
48
49 typedef struct {
50 - int pos;
51 - int **tags;
52 + regoff_t pos;
53 + regoff_t **tags;
54 } tre_reach_pos_t;
55
56
57 static reg_errcode_t
58 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
59 - int *match_tags, int eflags,
60 - int *match_end_ofs)
61 + regoff_t *match_tags, int eflags,
62 + regoff_t *match_end_ofs)
63 {
64 /* State variables required by GET_NEXT_WCHAR. */
65 tre_char_t prev_c = 0, next_c = 0;
66 const char *str_byte = string;
67 - int pos = -1;
68 - int pos_add_next = 1;
69 + regoff_t pos = -1;
70 + regoff_t pos_add_next = 1;
71 #ifdef TRE_MBSTATE
72 mbstate_t mbstate;
73 #endif /* TRE_MBSTATE */
74 @@ -191,10 +191,10 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
75 int *tag_i;
76 int num_tags, i;
77
78 - int match_eo = -1; /* end offset of match (-1 if no match found yet) */
79 + regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */
80 int new_match = 0;
81 - int *tmp_tags = NULL;
82 - int *tmp_iptr;
83 + regoff_t *tmp_tags = NULL;
84 + regoff_t *tmp_iptr;
85
86 #ifdef TRE_MBSTATE
87 memset(&mbstate, '\0', sizeof(mbstate));
88 @@ -214,7 +214,7 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
89
90 /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
91 * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
92 - if (num_tags > SIZE_MAX/(8 * sizeof(int) * tnfa->num_states))
93 + if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states))
94 goto error_exit;
95
96 /* Likewise check rbytes. */
97 @@ -229,7 +229,7 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
98 tbytes = sizeof(*tmp_tags) * num_tags;
99 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
100 pbytes = sizeof(*reach_pos) * tnfa->num_states;
101 - xbytes = sizeof(int) * num_tags;
102 + xbytes = sizeof(regoff_t) * num_tags;
103 total_bytes =
104 (sizeof(long) - 1) * 4 /* for alignment paddings */
105 + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
106 @@ -490,12 +490,12 @@ error_exit:
107 */
108
109 typedef struct {
110 - int pos;
111 + regoff_t pos;
112 const char *str_byte;
113 tre_tnfa_transition_t *state;
114 int state_id;
115 int next_c;
116 - int *tags;
117 + regoff_t *tags;
118 #ifdef TRE_MBSTATE
119 mbstate_t mbstate;
120 #endif /* TRE_MBSTATE */
121 @@ -591,13 +591,13 @@ typedef struct tre_backtrack_struct {
122
123 static reg_errcode_t
124 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
125 - int *match_tags, int eflags, int *match_end_ofs)
126 + regoff_t *match_tags, int eflags, regoff_t *match_end_ofs)
127 {
128 /* State variables required by GET_NEXT_WCHAR. */
129 tre_char_t prev_c = 0, next_c = 0;
130 const char *str_byte = string;
131 - int pos = 0;
132 - int pos_add_next = 1;
133 + regoff_t pos = 0;
134 + regoff_t pos_add_next = 1;
135 #ifdef TRE_MBSTATE
136 mbstate_t mbstate;
137 #endif /* TRE_MBSTATE */
138 @@ -610,15 +610,16 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
139 started from. */
140 int next_c_start;
141 const char *str_byte_start;
142 - int pos_start = -1;
143 + regoff_t pos_start = -1;
144 #ifdef TRE_MBSTATE
145 mbstate_t mbstate_start;
146 #endif /* TRE_MBSTATE */
147
148 /* End offset of best match so far, or -1 if no match found yet. */
149 - int match_eo = -1;
150 + regoff_t match_eo = -1;
151 /* Tag arrays. */
152 - int *next_tags, *tags = NULL;
153 + int *next_tags;
154 + regoff_t *tags = NULL;
155 /* Current TNFA state. */
156 tre_tnfa_transition_t *state;
157 int *states_seen = NULL;
158 @@ -768,8 +769,9 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
159 /* This is a back reference state. All transitions leaving from
160 this state have the same back reference "assertion". Instead
161 of reading the next character, we match the back reference. */
162 - int so, eo, bt = trans_i->u.backref;
163 - int bt_len;
164 + regoff_t so, eo;
165 + int bt = trans_i->u.backref;
166 + regoff_t bt_len;
167 int result;
168
169 /* Get the substring we need to match against. Remember to
170 @@ -926,7 +928,7 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
171 endpoint values. */
172 static void
173 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
174 - const tre_tnfa_t *tnfa, int *tags, int match_eo)
175 + const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo)
176 {
177 tre_submatch_data_t *submatch_data;
178 unsigned int i, j;
179 @@ -996,7 +998,7 @@ regexec(const regex_t *restrict preg, const char *restrict string,
180 {
181 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
182 reg_errcode_t status;
183 - int *tags = NULL, eo;
184 + regoff_t *tags = NULL, eo;
185 if (tnfa->cflags & REG_NOSUB) nmatch = 0;
186 if (tnfa->num_tags > 0 && nmatch > 0)
187 {
188 --
189 cgit v0.11.2