misc/collect.py: rename TP-LINK to TP-LINK
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18 assert sys.version_info >= (3, 5), "Python version too old. Python >=3.5.0 needed."
19
20
21 # accepts {<file-path>: <file-content>}
22 def merge_profiles(profiles, download_url):
23 # json output data
24 output = {}
25
26 def get_title(title):
27 if "title" in title:
28 return title["title"]
29 else:
30 return "{} {} {}".format(
31 title.get("vendor", ""), title["model"], title.get("variant", "")
32 ).strip()
33
34 def add_profile(path, id, target, profile, code=None):
35 images = []
36 for image in profile["images"]:
37 images.append({"name": image["name"], "type": image["type"]})
38
39 if target is None:
40 target = profile["target"]
41
42 for entry in profile["titles"]:
43 title = get_title(entry)
44
45 if len(title) == 0:
46 sys.stderr.write(
47 "Empty title. Skip title for {} in {}\n".format(id, path)
48 )
49 continue
50
51 """
52 Some devices are in ar71xx and ath79. But use TP-LINK" and "TP-Link".
53 E.g: `TP-LINK Archer C7 v5` and `TP-Link Archer C7 v5`
54 To be able to detect this, we need to make "same" titles identical.
55 """
56 if title.startswith("TP-LINK "):
57 title = "TP-Link {}".format(title[8:])
58
59 # device is a duplicate, try to differentiate by target
60 if title in output["models"]:
61 title = "{} ({})".format(title, target)
62
63 output["models"][title] = {"id": id, "target": target, "images": images}
64
65 if code is not None:
66 output["models"][title]["code"] = code
67
68 for path, content in profiles.items():
69 obj = json.loads(content)
70
71 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
72 sys.stderr.write(
73 "{} has unsupported metadata version: {} => skip\n".format(
74 path, obj["metadata_version"]
75 )
76 )
77 continue
78
79 code = obj.get("version_code", obj.get("version_commit"))
80
81 if "version_code" not in output:
82 output = {"version_code": code, "download_url": download_url, "models": {}}
83
84 # if we have mixed codes/commits, store in device object
85 if output["version_code"] == code:
86 code = None
87
88 try:
89 if "profiles" in obj:
90 for id in obj["profiles"]:
91 add_profile(path, id, obj.get("target"), obj["profiles"][id], code)
92 else:
93 add_profile(path, obj["id"], obj["target"], obj, code)
94 except json.decoder.JSONDecodeError as e:
95 sys.stderr.write("Skip {}\n {}\n".format(path, e))
96 except KeyError as e:
97 sys.stderr.write("Abort on {}\n Missing key {}\n".format(path, e))
98 exit(1)
99
100 return output
101
102
103 def update_config(config_path, versions):
104 content = ""
105 with open(str(config_path), "r", encoding="utf-8") as file:
106 content = file.read()
107
108 content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
109 with open(str(config_path), "w+") as file:
110 file.write(content)
111
112
113 """
114 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
115 Merge into overview.json files.
116 Update config.json.
117 """
118
119
120 def scrape(args):
121 url = args.domain
122 www_path = args.www_path
123 config_path = "{}/config.js".format(www_path)
124 data_path = "{}/data".format(www_path)
125 versions = {}
126
127 def handle_release(target):
128 profiles = {}
129 with urllib.request.urlopen("{}/?json".format(target)) as file:
130 array = json.loads(file.read().decode("utf-8"))
131 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
132 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
133 profiles["{}/{}".format(target, profile)] = file.read().decode(
134 "utf-8"
135 )
136 return profiles
137
138 if not os.path.isfile(config_path):
139 print("file not found: {}".format(config_path))
140 exit(1)
141
142 # fetch release URLs
143 with urllib.request.urlopen(url) as infile:
144 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
145 if not path.startswith("/") and path.endswith("targets/"):
146 release = path.strip("/").split("/")[-2]
147 download_url = "{}/{}/{{target}}".format(url, path)
148
149 profiles = handle_release("{}/{}".format(url, path))
150 output = merge_profiles(profiles, download_url)
151 if len(output) > 0:
152 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
153 # write overview.json
154 with open(
155 "{}/{}/overview.json".format(data_path, release), "w"
156 ) as outfile:
157 if args.formatted:
158 json.dump(output, outfile, indent=" ", sort_keys=True)
159 else:
160 json.dump(output, outfile, sort_keys=True)
161
162 versions[release] = "data/{}/overview.json".format(release)
163
164 update_config(config_path, versions)
165
166
167 """
168 Scrape profiles.json using wget (slower but more generic).
169 Merge into overview.json files.
170 Update config.json.
171 """
172
173
174 def scrape_wget(args):
175 url = args.domain
176 www_path = args.www_path
177 config_path = "{}/config.js".format(www_path)
178 data_path = "{}/data".format(www_path)
179 versions = {}
180
181 with tempfile.TemporaryDirectory() as tmp_dir:
182 # download all profiles.json files
183 os.system(
184 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
185 tmp_dir, url
186 )
187 )
188
189 # delete empty folders
190 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
191
192 # create overview.json files
193 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
194 "{}/*/releases/*".format(tmp_dir)
195 ):
196 release = os.path.basename(path)
197 base = path[len(tmp_dir) + 1 :]
198
199 profiles = {}
200 for ppath in Path(path).rglob("profiles.json"):
201 with open(str(ppath), "r", encoding="utf-8") as file:
202 profiles[ppath] = file.read()
203
204 if len(profiles) == 0:
205 continue
206
207 versions[release] = "data/{}/overview.json".format(release)
208
209 output = merge_profiles(
210 profiles, "https://{}/targets/{{target}}".format(base)
211 )
212 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
213
214 # write overview.json
215 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
216 if args.formatted:
217 json.dump(output, outfile, indent=" ", sort_keys=True)
218 else:
219 json.dump(output, outfile, sort_keys=True)
220
221 update_config(config_path, versions)
222
223
224 """
225 Find and merge json files for a single release.
226 """
227
228
229 def merge(args):
230 input_paths = args.input_path
231 # OpenWrt JSON device files
232 profiles = {}
233
234 def add_path(path):
235 with open(str(path), "r", encoding="utf-8") as file:
236 profiles[path] = file.read()
237
238 for path in input_paths:
239 if os.path.isdir(path):
240 for filepath in Path(path).rglob("*.json"):
241 add_path(filepath)
242 else:
243 if not path.endswith(".json"):
244 sys.stderr.write("Folder does not exists: {}\n".format(path))
245 exit(1)
246 add_path(path)
247
248 output = merge_profiles(profiles, args.download_url)
249
250 if args.formatted:
251 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
252 else:
253 json.dump(output, sys.stdout, sort_keys=True)
254
255
256 """
257 Scan local directory for releases with profiles.json.
258 Merge into overview.json files.
259 Update config.json.
260 """
261
262
263 def scan(args):
264 # firmware selector config
265 config_path = "{}/config.js".format(args.www_path)
266 # the overview.json files are placed here
267 data_path = "{}/data".format(args.www_path)
268 versions = {}
269
270 # args.images_path => args.releases_path
271 releases = {}
272 for path in Path(args.images_path).rglob("profiles.json"):
273 with open(str(path), "r", encoding="utf-8") as file:
274 content = file.read()
275 obj = json.loads(content)
276 release = obj["version_number"]
277 releases.setdefault(release, {})[path] = content
278
279 for release, profiles in releases.items():
280 output = merge_profiles(profiles, args.download_url)
281
282 versions[release] = "data/{}/overview.json".format(release)
283 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
284
285 # write overview.json
286 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
287 if args.formatted:
288 json.dump(output, outfile, indent=" ", sort_keys=True)
289 else:
290 json.dump(output, outfile, sort_keys=True)
291
292 update_config(config_path, versions)
293
294
295 def main():
296 parser = argparse.ArgumentParser()
297 parser.add_argument(
298 "--formatted", action="store_true", help="Output formatted JSON data."
299 )
300 subparsers = parser.add_subparsers(dest="action")
301 subparsers.required = True
302
303 parser_merge = subparsers.add_parser(
304 "merge", help="Search for profiles.json files and output an overview.json."
305 )
306 parser_merge.add_argument(
307 "input_path",
308 nargs="+",
309 help="Input folder that is traversed for OpenWrt JSON device files.",
310 )
311 parser_merge.add_argument(
312 "--download-url",
313 action="store",
314 default="",
315 help="Link to get the image from. May contain {target} (replaced by e.g. ath79/generic), {version} (replace by the version key from config.js) and {commit} (git commit in hex notation).",
316 )
317
318 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
319 parser_scrape.add_argument(
320 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
321 )
322 parser_scrape.add_argument("www_path", help="Path the config.js file is in.")
323 parser_scrape.add_argument(
324 "--use-wget", action="store_true", help="Use wget to scrape the site."
325 )
326
327 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
328 parser_scan.add_argument(
329 "download_url", help="Download for images. E.g. https://downloads.openwrt.org"
330 )
331 parser_scan.add_argument("images_path", help="Directory to scan for releases.")
332 parser_scan.add_argument("www_path", help="Path the config.js file is in.")
333
334 args = parser.parse_args()
335
336 if args.action == "merge":
337 merge(args)
338
339 if args.action == "scan":
340 scan(args)
341
342 if args.action == "scrape":
343 if args.use_wget:
344 scrape_wget(args)
345 else:
346 scrape(args)
347
348
349 if __name__ == "__main__":
350 main()