misc/collect.py: use list to collect profile.json data
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18 assert sys.version_info >= (3, 5), "Python version too old. Python >=3.5.0 needed."
19
20
21 def add_profile(output, path, id, target, profile, code=None):
22 def get_title(title):
23 if "title" in title:
24 return title["title"]
25 else:
26 return "{} {} {}".format(
27 title.get("vendor", ""), title["model"], title.get("variant", "")
28 ).strip()
29
30 images = []
31 for image in profile["images"]:
32 images.append({"name": image["name"], "type": image["type"]})
33
34 if target is None:
35 target = profile["target"]
36
37 for entry in profile["titles"]:
38 title = get_title(entry)
39
40 if len(title) == 0:
41 sys.stderr.write("Empty title. Skip title for {} in {}\n".format(id, path))
42 continue
43
44 """
45 Some devices are in ar71xx and ath79. But use TP-LINK" and "TP-Link".
46 E.g: `TP-LINK Archer C7 v5` and `TP-Link Archer C7 v5`
47 To be able to detect this, we need to make "same" titles identical.
48 """
49 if title.startswith("TP-LINK "):
50 title = "TP-Link {}".format(title[8:])
51
52 # device is a duplicate, try to differentiate by target
53 if title in output["models"]:
54 title = "{} ({})".format(title, target)
55
56 output["models"][title] = {"id": id, "target": target, "images": images}
57
58 if code is not None:
59 output["models"][title]["code"] = code
60
61
62 # accepts {<file-path>: <file-content>}
63 def merge_profiles(profiles, download_url):
64 # json output data
65 output = {}
66
67 for profile in profiles:
68 obj = json.loads(profile["file_content"])
69
70 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
71 sys.stderr.write(
72 "{} has unsupported metadata version: {} => skip\n".format(
73 profile["file_path"], obj["metadata_version"]
74 )
75 )
76 continue
77
78 code = obj.get("version_code", obj.get("version_commit"))
79 file_path = profile["file_path"]
80
81 if "version_code" not in output:
82 output = {"version_code": code, "download_url": download_url, "models": {}}
83
84 # if we have mixed codes/commits, store in device object
85 if output["version_code"] == code:
86 code = None
87
88 try:
89 if "profiles" in obj:
90 for id in obj["profiles"]:
91 add_profile(
92 output,
93 file_path,
94 id,
95 obj.get("target"),
96 obj["profiles"][id],
97 code,
98 )
99 else:
100 add_profile(output, file_path, obj["id"], obj["target"], obj, code)
101 except json.decoder.JSONDecodeError as e:
102 sys.stderr.write("Skip {}\n {}\n".format(file_path, e))
103 except KeyError as e:
104 sys.stderr.write("Abort on {}\n Missing key {}\n".format(file_path, e))
105 exit(1)
106
107 return output
108
109
110 def update_config(config_path, versions):
111 content = ""
112 with open(str(config_path), "r", encoding="utf-8") as file:
113 content = file.read()
114
115 content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
116 with open(str(config_path), "w+") as file:
117 file.write(content)
118
119
120 """
121 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
122 Merge into overview.json files.
123 Update config.json.
124 """
125
126
127 def scrape(args):
128 url = args.domain
129 www_path = args.www_path
130 config_path = "{}/config.js".format(www_path)
131 data_path = "{}/data".format(www_path)
132 versions = {}
133
134 def handle_release(target):
135 profiles = []
136 with urllib.request.urlopen("{}/?json".format(target)) as file:
137 array = json.loads(file.read().decode("utf-8"))
138 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
139 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
140 profiles.append(
141 {
142 "file_path": "{}/{}".format(target, profile),
143 "file_content": file.read().decode("utf-8"),
144 }
145 )
146 return profiles
147
148 if not os.path.isfile(config_path):
149 print("file not found: {}".format(config_path))
150 exit(1)
151
152 # fetch release URLs
153 with urllib.request.urlopen(url) as infile:
154 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
155 if not path.startswith("/") and path.endswith("targets/"):
156 release = path.strip("/").split("/")[-2]
157 download_url = "{}/{}/{{target}}".format(url, path)
158
159 profiles = handle_release("{}/{}".format(url, path))
160 output = merge_profiles(profiles, download_url)
161 if len(output) > 0:
162 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
163 # write overview.json
164 with open(
165 "{}/{}/overview.json".format(data_path, release), "w"
166 ) as outfile:
167 if args.formatted:
168 json.dump(output, outfile, indent=" ", sort_keys=True)
169 else:
170 json.dump(output, outfile, sort_keys=True)
171
172 versions[release] = "data/{}/overview.json".format(release)
173
174 update_config(config_path, versions)
175
176
177 """
178 Scrape profiles.json using wget (slower but more generic).
179 Merge into overview.json files.
180 Update config.json.
181 """
182
183
184 def scrape_wget(args):
185 url = args.domain
186 www_path = args.www_path
187 config_path = "{}/config.js".format(www_path)
188 data_path = "{}/data".format(www_path)
189 versions = {}
190
191 with tempfile.TemporaryDirectory() as tmp_dir:
192 # download all profiles.json files
193 os.system(
194 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
195 tmp_dir, url
196 )
197 )
198
199 # delete empty folders
200 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
201
202 # create overview.json files
203 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
204 "{}/*/releases/*".format(tmp_dir)
205 ):
206 release = os.path.basename(path)
207 base = path[len(tmp_dir) + 1 :]
208
209 profiles = []
210 for ppath in Path(path).rglob("profiles.json"):
211 with open(str(ppath), "r", encoding="utf-8") as file:
212 profiles.append(
213 {"file_path": str(ppath), "file_content": file.read()}
214 )
215
216 if len(profiles) == 0:
217 continue
218
219 versions[release] = "data/{}/overview.json".format(release)
220
221 output = merge_profiles(
222 profiles, "https://{}/targets/{{target}}".format(base)
223 )
224 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
225
226 # write overview.json
227 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
228 if args.formatted:
229 json.dump(output, outfile, indent=" ", sort_keys=True)
230 else:
231 json.dump(output, outfile, sort_keys=True)
232
233 update_config(config_path, versions)
234
235
236 """
237 Find and merge json files for a single release.
238 """
239
240
241 def merge(args):
242 input_paths = args.input_path
243 # OpenWrt JSON device files
244 profiles = []
245
246 def add_path(path):
247 with open(str(path), "r", encoding="utf-8") as file:
248 profiles.append({"file_path": str(path), "file_content": file.read()})
249
250 for path in input_paths:
251 if os.path.isdir(path):
252 for filepath in Path(path).rglob("*.json"):
253 add_path(filepath)
254 else:
255 if not path.endswith(".json"):
256 sys.stderr.write("Folder does not exists: {}\n".format(path))
257 exit(1)
258 add_path(path)
259
260 output = merge_profiles(profiles, args.download_url)
261
262 if args.formatted:
263 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
264 else:
265 json.dump(output, sys.stdout, sort_keys=True)
266
267
268 """
269 Scan local directory for releases with profiles.json.
270 Merge into overview.json files.
271 Update config.json.
272 """
273
274
275 def scan(args):
276 # firmware selector config
277 config_path = "{}/config.js".format(args.www_path)
278 # the overview.json files are placed here
279 data_path = "{}/data".format(args.www_path)
280 versions = {}
281
282 # args.images_path => args.releases_path
283 releases = {}
284 for path in Path(args.images_path).rglob("profiles.json"):
285 with open(str(path), "r", encoding="utf-8") as file:
286 content = file.read()
287 obj = json.loads(content)
288 release = obj["version_number"]
289 releases.setdefault(release, []).append(
290 {"file_path": str(path), "file_content": content}
291 )
292
293 for release, profiles in releases.items():
294 output = merge_profiles(profiles, args.download_url)
295
296 versions[release] = "data/{}/overview.json".format(release)
297 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
298
299 # write overview.json
300 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
301 if args.formatted:
302 json.dump(output, outfile, indent=" ", sort_keys=True)
303 else:
304 json.dump(output, outfile, sort_keys=True)
305
306 update_config(config_path, versions)
307
308
309 def main():
310 parser = argparse.ArgumentParser()
311 parser.add_argument(
312 "--formatted", action="store_true", help="Output formatted JSON data."
313 )
314 subparsers = parser.add_subparsers(dest="action")
315 subparsers.required = True
316
317 parser_merge = subparsers.add_parser(
318 "merge", help="Search for profiles.json files and output an overview.json."
319 )
320 parser_merge.add_argument(
321 "input_path",
322 nargs="+",
323 help="Input folder that is traversed for OpenWrt JSON device files.",
324 )
325 parser_merge.add_argument(
326 "--download-url",
327 action="store",
328 default="",
329 help="Link to get the image from. May contain {target} (replaced by e.g. ath79/generic), {version} (replace by the version key from config.js) and {commit} (git commit in hex notation).",
330 )
331
332 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
333 parser_scrape.add_argument(
334 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
335 )
336 parser_scrape.add_argument("www_path", help="Path the config.js file is in.")
337 parser_scrape.add_argument(
338 "--use-wget", action="store_true", help="Use wget to scrape the site."
339 )
340
341 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
342 parser_scan.add_argument(
343 "download_url", help="Download for images. E.g. https://downloads.openwrt.org"
344 )
345 parser_scan.add_argument("images_path", help="Directory to scan for releases.")
346 parser_scan.add_argument("www_path", help="Path the config.js file is in.")
347
348 args = parser.parse_args()
349
350 if args.action == "merge":
351 merge(args)
352
353 if args.action == "scan":
354 scan(args)
355
356 if args.action == "scrape":
357 if args.use_wget:
358 scrape_wget(args)
359 else:
360 scrape(args)
361
362
363 if __name__ == "__main__":
364 main()