95a1c3914c3275710ea4f6ab7089a64fa829ba81
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import datetime
10 import argparse
11 import email
12 import time
13 import json
14 import glob
15 import sys
16 import os
17 import re
18
19 SUPPORTED_METADATA_VERSION = 1
20 BUILD_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
21
22 assert sys.version_info >= (3, 5), "Python version too old. Python >=3.5.0 needed."
23
24
25 def add_profile(output, path, id, target, profile, code=None, build_date=None):
26 def get_title(title):
27 if "title" in title:
28 return title["title"]
29 else:
30 return "{} {} {}".format(
31 title.get("vendor", ""), title["model"], title.get("variant", "")
32 ).strip()
33
34 images = []
35 for image in profile["images"]:
36 images.append({"name": image["name"], "type": image["type"]})
37
38 if target is None:
39 target = profile["target"]
40
41 for entry in profile["titles"]:
42 title = get_title(entry)
43
44 if len(title) == 0:
45 sys.stderr.write("Empty title. Skip title for {} in {}\n".format(id, path))
46 continue
47
48 """
49 Some devices are in ar71xx and ath79. But use TP-LINK" and "TP-Link".
50 E.g: `TP-LINK Archer C7 v5` and `TP-Link Archer C7 v5`
51 To be able to detect this, we need to make "same" titles identical.
52 """
53 if title.startswith("TP-LINK "):
54 title = "TP-Link {}".format(title[8:])
55
56 # device is a duplicate, try to differentiate by target
57 if title in output["models"]:
58 title = "{} ({})".format(title, target)
59
60 output["models"][title] = {"id": id, "target": target, "images": images}
61
62 if build_date is not None:
63 output["models"][title]["build_date"] = build_date
64
65 if code is not None:
66 output["models"][title]["code"] = code
67
68
69 # accepts {<file-path>: <file-content>}
70 def merge_profiles(profiles, download_url):
71 # json output data
72 output = {}
73
74 for profile in profiles:
75 obj = json.loads(profile["file_content"])
76
77 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
78 sys.stderr.write(
79 "{} has unsupported metadata version: {} => skip\n".format(
80 profile["file_path"], obj["metadata_version"]
81 )
82 )
83 continue
84
85 code = obj.get("version_code", obj.get("version_commit"))
86 file_path = profile["file_path"]
87 build_date = profile["last_modified"]
88
89 if "version_code" not in output:
90 output = {"version_code": code, "download_url": download_url, "models": {}}
91
92 # if we have mixed codes/commits, store in device object
93 if output["version_code"] == code:
94 code = None
95
96 try:
97 if "profiles" in obj:
98 for id in obj["profiles"]:
99 add_profile(
100 output,
101 file_path,
102 id,
103 obj.get("target"),
104 obj["profiles"][id],
105 code,
106 build_date,
107 )
108 else:
109 add_profile(
110 output, file_path, obj["id"], obj["target"], obj, code, build_date
111 )
112 except json.decoder.JSONDecodeError as e:
113 sys.stderr.write("Skip {}\n {}\n".format(file_path, e))
114 except KeyError as e:
115 sys.stderr.write("Abort on {}\n Missing key {}\n".format(file_path, e))
116 exit(1)
117
118 return output
119
120
121 def update_config(www_path, versions):
122 config_path = "{}/config.js".format(www_path)
123
124 if os.path.isfile(config_path):
125 content = ""
126 with open(str(config_path), "r", encoding="utf-8") as file:
127 content = file.read()
128
129 content = re.sub(
130 "versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content
131 )
132 with open(str(config_path), "w+") as file:
133 file.write(content)
134 else:
135 sys.stderr.write("Warning: File not found: {}\n".format(config_path))
136
137
138 """
139 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
140 Merge into overview.json files.
141 Update config.json.
142 """
143
144
145 def scrape(args):
146 url = args.domain
147 data_path = "{}/data".format(args.www_path)
148 versions = {}
149
150 def handle_release(target):
151 profiles = []
152 with urllib.request.urlopen("{}/?json".format(target)) as file:
153 array = json.loads(file.read().decode("utf-8"))
154 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
155 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
156 last_modified = datetime.datetime(
157 *email.utils.parsedate(file.headers.get("last-modified"))[:6]
158 ).strftime(BUILD_DATE_FORMAT)
159 profiles.append(
160 {
161 "file_path": "{}/{}".format(target, profile),
162 "file_content": file.read().decode("utf-8"),
163 "last_modified": last_modified,
164 }
165 )
166 return profiles
167
168 # fetch release URLs
169 with urllib.request.urlopen(url) as infile:
170 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
171 if not path.startswith("/") and path.endswith("targets/"):
172 release = path.strip("/").split("/")[-2]
173 download_url = "{}/{}/{{target}}".format(url, path)
174
175 profiles = handle_release("{}/{}".format(url, path))
176 output = merge_profiles(profiles, download_url)
177 if len(output) > 0:
178 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
179 # write overview.json
180 with open(
181 "{}/{}/overview.json".format(data_path, release), "w"
182 ) as outfile:
183 if args.formatted:
184 json.dump(output, outfile, indent=" ", sort_keys=True)
185 else:
186 json.dump(output, outfile, sort_keys=True)
187
188 versions[release] = "data/{}/overview.json".format(release)
189
190 update_config(args.www_path, versions)
191
192
193 """
194 Scrape profiles.json using wget (slower but more generic).
195 Merge into overview.json files.
196 Update config.json.
197 """
198
199
200 def scrape_wget(args):
201 url = args.domain
202 data_path = "{}/data".format(args.www_path)
203 versions = {}
204
205 with tempfile.TemporaryDirectory() as tmp_dir:
206 # download all profiles.json files
207 os.system(
208 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
209 tmp_dir, url
210 )
211 )
212
213 # delete empty folders
214 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
215
216 # create overview.json files
217 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
218 "{}/*/releases/*".format(tmp_dir)
219 ):
220 release = os.path.basename(path)
221 base = path[len(tmp_dir) + 1 :]
222
223 profiles = []
224 for ppath in Path(path).rglob("profiles.json"):
225 with open(str(ppath), "r", encoding="utf-8") as file:
226 # we assume local timezone is UTC/GMT
227 last_modified = datetime.datetime.fromtimestamp(
228 os.path.getmtime(ppath)
229 ).strftime(BUILD_DATE_FORMAT)
230 profiles.append(
231 {
232 "file_path": str(ppath),
233 "file_content": file.read(),
234 "last_modified": last_modified,
235 }
236 )
237
238 if len(profiles) == 0:
239 continue
240
241 versions[release] = "data/{}/overview.json".format(release)
242
243 output = merge_profiles(
244 profiles, "https://{}/targets/{{target}}".format(base)
245 )
246 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
247
248 # write overview.json
249 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
250 if args.formatted:
251 json.dump(output, outfile, indent=" ", sort_keys=True)
252 else:
253 json.dump(output, outfile, sort_keys=True)
254
255 update_config(args.www_path, versions)
256
257
258 """
259 Find and merge json files for a single release.
260 """
261
262
263 def merge(args):
264 input_paths = args.input_path
265 # OpenWrt JSON device files
266 profiles = []
267
268 def add_path(path):
269 with open(str(path), "r", encoding="utf-8") as file:
270 last_modified = time.strftime(
271 BUILD_DATE_FORMAT, time.gmtime(os.path.getmtime(str(path)))
272 )
273 profiles.append(
274 {
275 "file_path": str(path),
276 "file_content": file.read(),
277 "last_modified": last_modified,
278 }
279 )
280
281 for path in input_paths:
282 if os.path.isdir(path):
283 for filepath in Path(path).rglob("*.json"):
284 add_path(filepath)
285 else:
286 if not path.endswith(".json"):
287 sys.stderr.write("Folder does not exists: {}\n".format(path))
288 exit(1)
289 add_path(path)
290
291 output = merge_profiles(profiles, args.download_url)
292
293 if args.formatted:
294 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
295 else:
296 json.dump(output, sys.stdout, sort_keys=True)
297
298
299 """
300 Scan local directory for releases with profiles.json.
301 Merge into overview.json files.
302 Update config.json.
303 """
304
305
306 def scan(args):
307 # the overview.json files are placed here
308 data_path = "{}/data".format(args.www_path)
309 versions = {}
310
311 # args.images_path => args.releases_path
312 releases = {}
313 for path in Path(args.images_path).rglob("profiles.json"):
314 with open(str(path), "r", encoding="utf-8") as file:
315 content = file.read()
316 obj = json.loads(content)
317 release = obj["version_number"]
318 last_modified = time.strftime(
319 BUILD_DATE_FORMAT, time.gmtime(os.path.getmtime(str(path)))
320 )
321 releases.setdefault(release, []).append(
322 {
323 "file_path": str(path),
324 "file_content": content,
325 "last_modified": last_modified,
326 }
327 )
328
329 """
330 Replace {base} variable in download URL with the intersection
331 of all profile.json paths. E.g.:
332 ../tmp/releases/18.06.8/targets => base is releases/18.06.8/targets
333 ../tmp/snapshots/targets => base in snapshots/targets
334 """
335
336 def replace_base(releases, target_release, download_url):
337 if "{base}" in download_url:
338 # release => base path (of profiles.json locations)
339 paths = {}
340 for release, profiles in releases.items():
341 profile_paths = [profile["file_path"] for profile in profiles]
342 paths[release] = os.path.commonpath(profile_paths)
343 # base path of all releases
344 release_path_base = os.path.commonpath(paths.values())
345 # get path intersection
346 base = str(paths[target_release])[len(release_path_base) + 1 :]
347 return download_url.replace("{base}", base)
348 else:
349 return download_url
350
351 for release, profiles in releases.items():
352 download_url = replace_base(releases, release, args.download_url)
353 output = merge_profiles(profiles, download_url)
354
355 versions[release] = "data/{}/overview.json".format(release)
356 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
357
358 # write overview.json
359 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
360 if args.formatted:
361 json.dump(output, outfile, indent=" ", sort_keys=True)
362 else:
363 json.dump(output, outfile, sort_keys=True)
364
365 update_config(args.www_path, versions)
366
367
368 def main():
369 parser = argparse.ArgumentParser()
370 parser.add_argument(
371 "--formatted", action="store_true", help="Output formatted JSON data."
372 )
373 subparsers = parser.add_subparsers(dest="action")
374 subparsers.required = True
375
376 parser_merge = subparsers.add_parser(
377 "merge", help="Search for profiles.json files and output an overview.json."
378 )
379 parser_merge.add_argument(
380 "input_path",
381 nargs="+",
382 help="Input folder that is traversed for OpenWrt JSON device files.",
383 )
384 parser_merge.add_argument(
385 "--download-url",
386 action="store",
387 default="",
388 help="Link to get the image from. May contain {target} (replaced by e.g. ath79/generic), {version} (replace by the version key from config.js) and {commit} (git commit in hex notation).",
389 )
390
391 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
392 parser_scrape.add_argument(
393 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
394 )
395 parser_scrape.add_argument("www_path", help="Path the config.js file is in.")
396 parser_scrape.add_argument(
397 "--use-wget", action="store_true", help="Use wget to scrape the site."
398 )
399
400 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
401 parser_scan.add_argument(
402 "download_url", help="Download for images. E.g. https://downloads.openwrt.org"
403 )
404 parser_scan.add_argument("images_path", help="Directory to scan for releases.")
405 parser_scan.add_argument("www_path", help="Path the config.js file is in.")
406
407 args = parser.parse_args()
408
409 if args.action == "merge":
410 merge(args)
411
412 if args.action == "scan":
413 scan(args)
414
415 if args.action == "scrape":
416 if args.use_wget:
417 scrape_wget(args)
418 else:
419 scrape(args)
420
421
422 if __name__ == "__main__":
423 main()