misc/collect.py: differentiate duplicate models by target
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18 assert sys.version_info >= (3, 5), "Python version too old. Python >=3.5.0 needed."
19
20
21 # accepts {<file-path>: <file-content>}
22 def merge_profiles(profiles, download_url):
23 # json output data
24 output = {}
25
26 def get_title(title):
27 if "title" in title:
28 return title["title"]
29 else:
30 return "{} {} {}".format(
31 title.get("vendor", ""), title["model"], title.get("variant", "")
32 ).strip()
33
34 def add_profile(path, id, target, profile, code=None):
35 images = []
36 for image in profile["images"]:
37 images.append({"name": image["name"], "type": image["type"]})
38
39 if target is None:
40 target = profile["target"]
41
42 for entry in profile["titles"]:
43 title = get_title(entry)
44
45 if len(title) == 0:
46 sys.stderr.write(
47 "Empty title. Skip title for {} in {}\n".format(id, path)
48 )
49 continue
50
51 # device is a duplicate, try to differentiate by target
52 if title in output["models"]:
53 title = "{} ({})".format(title, target)
54
55 output["models"][title] = {"id": id, "target": target, "images": images}
56
57 if code is not None:
58 output["models"][title]["code"] = code
59
60 for path, content in profiles.items():
61 obj = json.loads(content)
62
63 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
64 sys.stderr.write(
65 "{} has unsupported metadata version: {} => skip\n".format(
66 path, obj["metadata_version"]
67 )
68 )
69 continue
70
71 code = obj.get("version_code", obj.get("version_commit"))
72
73 if "version_code" not in output:
74 output = {"version_code": code, "download_url": download_url, "models": {}}
75
76 # if we have mixed codes/commits, store in device object
77 if output["version_code"] == code:
78 code = None
79
80 try:
81 if "profiles" in obj:
82 for id in obj["profiles"]:
83 add_profile(path, id, obj.get("target"), obj["profiles"][id], code)
84 else:
85 add_profile(path, obj["id"], obj["target"], obj, code)
86 except json.decoder.JSONDecodeError as e:
87 sys.stderr.write("Skip {}\n {}\n".format(path, e))
88 except KeyError as e:
89 sys.stderr.write("Abort on {}\n Missing key {}\n".format(path, e))
90 exit(1)
91
92 return output
93
94
95 def update_config(config_path, versions):
96 content = ""
97 with open(str(config_path), "r", encoding="utf-8") as file:
98 content = file.read()
99
100 content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
101 with open(str(config_path), "w+") as file:
102 file.write(content)
103
104
105 """
106 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
107 Merge into overview.json files.
108 Update config.json.
109 """
110
111
112 def scrape(args):
113 url = args.domain
114 www_path = args.www_path
115 config_path = "{}/config.js".format(www_path)
116 data_path = "{}/data".format(www_path)
117 versions = {}
118
119 def handle_release(target):
120 profiles = {}
121 with urllib.request.urlopen("{}/?json".format(target)) as file:
122 array = json.loads(file.read().decode("utf-8"))
123 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
124 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
125 profiles["{}/{}".format(target, profile)] = file.read().decode(
126 "utf-8"
127 )
128 return profiles
129
130 if not os.path.isfile(config_path):
131 print("file not found: {}".format(config_path))
132 exit(1)
133
134 # fetch release URLs
135 with urllib.request.urlopen(url) as infile:
136 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
137 if not path.startswith("/") and path.endswith("targets/"):
138 release = path.strip("/").split("/")[-2]
139 download_url = "{}/{}/{{target}}".format(url, path)
140
141 profiles = handle_release("{}/{}".format(url, path))
142 output = merge_profiles(profiles, download_url)
143 if len(output) > 0:
144 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
145 # write overview.json
146 with open(
147 "{}/{}/overview.json".format(data_path, release), "w"
148 ) as outfile:
149 if args.formatted:
150 json.dump(output, outfile, indent=" ", sort_keys=True)
151 else:
152 json.dump(output, outfile, sort_keys=True)
153
154 versions[release] = "data/{}/overview.json".format(release)
155
156 update_config(config_path, versions)
157
158
159 """
160 Scrape profiles.json using wget (slower but more generic).
161 Merge into overview.json files.
162 Update config.json.
163 """
164
165
166 def scrape_wget(args):
167 url = args.domain
168 www_path = args.www_path
169 config_path = "{}/config.js".format(www_path)
170 data_path = "{}/data".format(www_path)
171 versions = {}
172
173 with tempfile.TemporaryDirectory() as tmp_dir:
174 # download all profiles.json files
175 os.system(
176 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
177 tmp_dir, url
178 )
179 )
180
181 # delete empty folders
182 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
183
184 # create overview.json files
185 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
186 "{}/*/releases/*".format(tmp_dir)
187 ):
188 release = os.path.basename(path)
189 base = path[len(tmp_dir) + 1 :]
190
191 profiles = {}
192 for ppath in Path(path).rglob("profiles.json"):
193 with open(str(ppath), "r", encoding="utf-8") as file:
194 profiles[ppath] = file.read()
195
196 if len(profiles) == 0:
197 continue
198
199 versions[release] = "data/{}/overview.json".format(release)
200
201 output = merge_profiles(
202 profiles, "https://{}/targets/{{target}}".format(base)
203 )
204 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
205
206 # write overview.json
207 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
208 if args.formatted:
209 json.dump(output, outfile, indent=" ", sort_keys=True)
210 else:
211 json.dump(output, outfile, sort_keys=True)
212
213 update_config(config_path, versions)
214
215
216 """
217 Find and merge json files for a single release.
218 """
219
220
221 def merge(args):
222 input_paths = args.input_path
223 # OpenWrt JSON device files
224 profiles = {}
225
226 def add_path(path):
227 with open(str(path), "r", encoding="utf-8") as file:
228 profiles[path] = file.read()
229
230 for path in input_paths:
231 if os.path.isdir(path):
232 for filepath in Path(path).rglob("*.json"):
233 add_path(filepath)
234 else:
235 if not path.endswith(".json"):
236 sys.stderr.write("Folder does not exists: {}\n".format(path))
237 exit(1)
238 add_path(path)
239
240 output = merge_profiles(profiles, args.download_url)
241
242 if args.formatted:
243 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
244 else:
245 json.dump(output, sys.stdout, sort_keys=True)
246
247
248 """
249 Scan local directory for releases with profiles.json.
250 Merge into overview.json files.
251 Update config.json.
252 """
253
254
255 def scan(args):
256 # firmware selector config
257 config_path = "{}/config.js".format(args.www_path)
258 # the overview.json files are placed here
259 data_path = "{}/data".format(args.www_path)
260 versions = {}
261
262 # args.images_path => args.releases_path
263 releases = {}
264 for path in Path(args.images_path).rglob("profiles.json"):
265 with open(str(path), "r", encoding="utf-8") as file:
266 content = file.read()
267 obj = json.loads(content)
268 release = obj["version_number"]
269 releases.setdefault(release, {})[path] = content
270
271 for release, profiles in releases.items():
272 output = merge_profiles(profiles, args.download_url)
273
274 versions[release] = "data/{}/overview.json".format(release)
275 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
276
277 # write overview.json
278 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
279 if args.formatted:
280 json.dump(output, outfile, indent=" ", sort_keys=True)
281 else:
282 json.dump(output, outfile, sort_keys=True)
283
284 update_config(config_path, versions)
285
286
287 def main():
288 parser = argparse.ArgumentParser()
289 parser.add_argument(
290 "--formatted", action="store_true", help="Output formatted JSON data."
291 )
292 subparsers = parser.add_subparsers(dest="action")
293 subparsers.required = True
294
295 parser_merge = subparsers.add_parser(
296 "merge", help="Search for profiles.json files and output an overview.json."
297 )
298 parser_merge.add_argument(
299 "input_path",
300 nargs="+",
301 help="Input folder that is traversed for OpenWrt JSON device files.",
302 )
303 parser_merge.add_argument(
304 "--download-url",
305 action="store",
306 default="",
307 help="Link to get the image from. May contain {target} (replaced by e.g. ath79/generic), {version} (replace by the version key from config.js) and {commit} (git commit in hex notation).",
308 )
309
310 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
311 parser_scrape.add_argument(
312 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
313 )
314 parser_scrape.add_argument("www_path", help="Path the config.js file is in.")
315 parser_scrape.add_argument(
316 "--use-wget", action="store_true", help="Use wget to scrape the site."
317 )
318
319 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
320 parser_scan.add_argument(
321 "download_url", help="Download for images. E.g. https://downloads.openwrt.org"
322 )
323 parser_scan.add_argument("images_path", help="Directory to scan for releases.")
324 parser_scan.add_argument("www_path", help="Path the config.js file is in.")
325
326 args = parser.parse_args()
327
328 if args.action == "merge":
329 merge(args)
330
331 if args.action == "scan":
332 scan(args)
333
334 if args.action == "scrape":
335 if args.use_wget:
336 scrape_wget(args)
337 else:
338 scrape(args)
339
340
341 if __name__ == "__main__":
342 main()