build: i18n-sync script refinements
[project/luci.git] / build / i18n-scan.pl
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5 use IPC::Open2;
6 use POSIX;
7 use Text::Balanced qw(gen_extract_tagged);
8
9 $ENV{'LC_ALL'} = 'C';
10 POSIX::setlocale(POSIX::LC_ALL, 'C');
11
12 @ARGV >= 1 || die "Usage: $0 <source directory>\n";
13
14
15 my %keywords = (
16 '.js' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
17 '.ut' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
18 '.uc' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
19 '.lua' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
20 '.htm' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
21 '.json' => [ '_:1', '_:1,2c' ]
22 );
23
24 sub xgettext($@) {
25 my $path = shift;
26 my @keywords = @_;
27 my ($ext) = $path =~ m!(\.\w+)$!;
28 my @cmd = qw(xgettext --from-code=UTF-8 --no-wrap);
29
30 if ($ext eq '.htm' || $ext eq '.lua') {
31 push @cmd, '--language=Lua';
32 }
33 elsif ($ext eq '.ut' || $ext eq '.uc' || $ext eq '.js' || $ext eq '.json') {
34 push @cmd, '--language=JavaScript';
35 }
36
37 push @cmd, map { "--keyword=$_" } (@{$keywords{$ext}}, @keywords);
38 push @cmd, '-o', '-';
39
40 return @cmd;
41 }
42
43 sub whitespace_collapse($) {
44 my $s = shift;
45 my %r = ('n' => ' ', 't' => ' ');
46
47 # Translate \t and \n to plain spaces, leave all other escape
48 # sequences alone. Finally replace all consecutive spaces by
49 # single ones and trim leading and trailing space.
50 $s =~ s/\\(.)/$r{$1} || "\\$1"/eg;
51 $s =~ s/ {2,}/ /g;
52 $s =~ s/^ //;
53 $s =~ s/ $//;
54
55 return $s;
56 }
57
58 sub postprocess_pot($$) {
59 my ($path, $source) = @_;
60 my (@res, $msgid);
61 my $skip = 1;
62
63 $source =~ s/^#: (.+?)\n/join("\n", map { "#: $path:$_" } $1 =~ m!:(\d+)!g) . "\n"/emg;
64
65 my @lines = split /\n/, $source;
66
67 # Remove all header lines up to the first location comment
68 while (@lines > 0 && $lines[0] !~ m!^#: !) {
69 shift @lines;
70 }
71
72 while (@lines > 0) {
73 my $line = shift @lines;
74
75 # Concat multiline msgids and collapse whitespaces
76 if ($line =~ m!^(msg\w+) "(.*)"$!) {
77 my $kw = $1;
78 my $kv = $2;
79
80 while (@lines > 0 && $lines[0] =~ m!^"(.*)"$!) {
81 $kv .= ' '. $1;
82 shift @lines;
83 }
84
85 $kv = whitespace_collapse($kv);
86
87 # Filter invalid empty msgids by popping all lines in @res
88 # leading to this point and skip all subsequent lines in
89 # @lines belonging to this faulty id.
90 if ($kw ne 'msgstr' && $kv eq '') {
91 while (@res > 0 && $res[-1] !~ m!^$!) {
92 pop @res;
93 }
94
95 while (@lines > 0 && $lines[0] =~ m!^(?:msg\w+ )?"(.*)"$!) {
96 shift @lines;
97 }
98
99 next;
100 }
101
102 push @res, sprintf '%s "%s"', $kw, $kv;
103 }
104
105 # Ignore any flags added by xgettext
106 elsif ($line =~ m!^#, !) {
107 next;
108 }
109
110 # Pass through other lines unmodified
111 else {
112 push @res, $line;
113 }
114 }
115
116 return @res ? join("\n", '', @res, '') : '';
117 }
118
119 sub uniq(@) {
120 my %h = map { $_, 1 } @_;
121 return sort keys %h;
122 }
123
124 sub preprocess_htm($$) {
125 my ($path, $source) = @_;
126 my $sub = {
127 '=' => '(%s)',
128 '_' => 'translate([==[%s]==])',
129 ':' => 'translate([==[%s]==])',
130 '+' => 'include([==[%s]==])',
131 '#' => '--[==[%s]==]',
132 '' => '%s'
133 };
134
135 # Translate the .htm source into a valid Lua source using bracket quotes
136 # to avoid the need for complex escaping.
137 $source =~ s!<%-?([=_:+#]?)(.*?)-?%>!
138 my $t = $1;
139 my $s = $2;
140
141 # Split translation expressions on first non-escaped pipe.
142 if ($t eq ':' || $t eq '_') {
143 $s =~ s/^((?:[^\|\\]|\\.)*)\|(.*)$/$1]==],[==[$2/;
144 }
145
146 sprintf "]==]; $sub->{$t}; [==[", $s
147 !sge;
148
149 # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
150 # and return them as extra keyword so that xgettext recognizes such expressions
151 # as translate(...) calls.
152 my @extra_function_keywords =
153 map { ("$_:1", "$_:1,2c") }
154 uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
155
156 return ("[==[$source]==]", @extra_function_keywords);
157 }
158
159 sub preprocess_ut($$) {
160 my ($path, $source) = @_;
161
162 # Translate the .ut source into valid JavaScript code by enclosing template text
163 # in multiline comments and extracting blocks as plain code.
164 my $comt = gen_extract_tagged('{#', '#}', '(?s).*?(?=\{[#{%])');
165 my $expr = gen_extract_tagged('{{', '}}', '(?s).*?(?=\{[#{%])');
166 my $stmt = gen_extract_tagged('{%', '%}', '(?s).*?(?=\{[#{%])');
167
168 my $res = '';
169
170 while (length($source)) {
171 my ($block, $remain, $prefix);
172
173 ($block, $remain, $prefix) = $comt->($source);
174 ($block, $remain, $prefix) = $expr->($source) unless defined $block;
175 ($block, $remain, $prefix) = $stmt->($source) unless defined $block;
176
177 last unless defined $block;
178
179 $source = $remain;
180
181 $prefix =~ s!\*/!*\\/!g;
182 $res .= '/*' . $prefix . '*/';
183
184 if ($block =~ s!^\{#(.*)#}$!$1!s) {
185 $block =~ s!\*/!*\\/!g;
186 $res .= '/*' . $block . '*/';
187 }
188 elsif ($block =~ s!^\{\{(.*)}}$!$1!s) {
189 $block =~ s!^[+-]!!;
190 $block =~ s![+-]$!!;
191 $res .= '(' . $block . ')';
192 }
193 elsif ($block =~ s!^\{%(.*)%}$!$1!s) {
194 $block =~ s!^[+-]!!;
195 $block =~ s![+-]$!!;
196 $res .= '{' . $block . '}';
197 }
198 }
199
200 if ($source =~ m!^(.*)\{%[+-]?(.*)$!s) {
201 my $prefix = $1;
202 my $block = $2;
203
204 $prefix =~ s!\*/!*\\/!g;
205 $res .= '/*' . $prefix . '*/';
206 $res .= '{' . $block . '}';
207 }
208
209 return ($res);
210 }
211
212 sub preprocess_lua($$) {
213 my ($path, $source) = @_;
214
215 # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
216 # and return them as extra keyword so that xgettext recognizes such expressions
217 # as translate(...) calls.
218 my @extra_function_keywords =
219 map { ("$_:1", "$_:1,2c") }
220 uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
221
222 return ($source, @extra_function_keywords);
223 }
224
225 sub preprocess_json($$) {
226 my ($path, $source) = @_;
227 my ($file) = $path =~ m!([^/]+)$!;
228
229 $source =~ s/("(?:title|description)")\s*:\s*("(?:[^"\\]|\\.)*")/$1: _($2)/sg;
230
231 return ($source);
232 }
233
234
235 my ($msguniq_in, $msguniq_out);
236 my $msguniq_pid = open2($msguniq_out, $msguniq_in, 'msguniq', '-s');
237
238 print $msguniq_in "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n";
239
240 if (open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' -o -name '*.uc' -o -name '*.ut' -o -path '*/menu.d/*.json' -o -path '*/acl.d/*.json' -o -path '*/statistics/plugins/*.json' -o -path '*/https-dns-proxy/providers/*.json' ')' |")
241 {
242 while (defined( my $file = readline F))
243 {
244 chomp $file;
245
246 if (open S, '<', $file)
247 {
248 local $/ = undef;
249 my $source = <S>;
250 my @extra_function_keywords;
251
252 if ($file =~ m!\.htm$!)
253 {
254 ($source, @extra_function_keywords) = preprocess_htm($file, $source);
255 }
256 elsif ($file =~ m!\.ut$!)
257 {
258 ($source, @extra_function_keywords) = preprocess_ut($file, $source);
259 }
260 elsif ($file =~ m!\.lua$!)
261 {
262 ($source, @extra_function_keywords) = preprocess_lua($file, $source);
263 }
264 elsif ($file =~ m!\.json$!)
265 {
266 ($source, @extra_function_keywords) = preprocess_json($file, $source);
267 }
268
269 my ($xgettext_in, $xgettext_out);
270 my $pid = open2($xgettext_out, $xgettext_in, xgettext($file, @extra_function_keywords), '-');
271
272 print $xgettext_in $source;
273 close $xgettext_in;
274
275 my $pot = readline $xgettext_out;
276 close $xgettext_out;
277
278 waitpid $pid, 0;
279
280 print $msguniq_in postprocess_pot($file, $pot);
281 }
282 }
283
284 close F;
285 }
286
287 close $msguniq_in;
288
289 my @pot = <$msguniq_out>;
290
291 close $msguniq_out;
292 waitpid $msguniq_pid, 0;
293
294 while (@pot > 0) {
295 my $line = shift @pot;
296
297 # Reorder the location comments in a deterministic way to
298 # reduce SCM noise when frequently updating templates.
299 if ($line =~ m!^#: !) {
300 my @locs = ($line);
301
302 while (@pot > 0 && $pot[0] =~ m!^#: !) {
303 push @locs, shift @pot;
304 }
305
306 print
307 map { join(':', @$_) . "\n" }
308 sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
309 map { [ /^(.+):(\d+)$/ ] }
310 @locs
311 ;
312
313 next;
314 }
315
316 print $line;
317 }