9608279da67bcccebd5728227d31b3a276097920
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18 assert sys.version_info >= (3, 5), "Python version too old. Python >=3.5.0 needed."
19
20
21 # accepts {<file-path>: <file-content>}
22 def merge_profiles(profiles, download_url):
23 # json output data
24 output = {}
25
26 def get_title(title):
27 if "title" in title:
28 return title["title"]
29 else:
30 return "{} {} {}".format(
31 title.get("vendor", ""), title["model"], title.get("variant", "")
32 ).strip()
33
34 def add_profile(id, target, profile, code=None):
35 images = []
36 for image in profile["images"]:
37 images.append({"name": image["name"], "type": image["type"]})
38
39 if target is None:
40 target = profile["target"]
41
42 for entry in profile["titles"]:
43 title = get_title(entry)
44
45 if len(title) == 0:
46 sys.stderr.write("Empty title. Skip title for {}".format(id))
47 continue
48
49 output["models"][title] = {"id": id, "target": target, "images": images}
50
51 if code is not None:
52 output["models"][title]["code"] = code
53
54 for path, content in profiles.items():
55 obj = json.loads(content)
56
57 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
58 sys.stderr.write(
59 "{} has unsupported metadata version: {} => skip".format(
60 path, obj["metadata_version"]
61 )
62 )
63 continue
64
65 code = obj.get("version_code", obj.get("version_commit"))
66
67 if "version_code" not in output:
68 output = {"version_code": code, "download_url": download_url, "models": {}}
69
70 # if we have mixed codes/commits, store in device object
71 if output["version_code"] == code:
72 code = None
73
74 try:
75 if "profiles" in obj:
76 for id in obj["profiles"]:
77 add_profile(id, obj.get("target"), obj["profiles"][id], code)
78 else:
79 add_profile(obj["id"], obj["target"], obj, code)
80 except json.decoder.JSONDecodeError as e:
81 sys.stderr.write("Skip {}\n {}\n".format(path, e))
82 except KeyError as e:
83 sys.stderr.write("Abort on {}\n Missing key {}\n".format(path, e))
84 exit(1)
85
86 return output
87
88
89 def update_config(config_path, versions):
90 content = ""
91 with open(str(config_path), "r") as file:
92 content = file.read()
93
94 content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
95 with open(str(config_path), "w+") as file:
96 file.write(content)
97
98
99 """
100 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
101 Merge into overview.json files.
102 Update config.json.
103 """
104
105
106 def scrape(args):
107 url = args.domain
108 selector_path = args.selector
109 config_path = "{}/config.js".format(selector_path)
110 data_path = "{}/data".format(selector_path)
111 versions = {}
112
113 def handle_release(target):
114 profiles = {}
115 with urllib.request.urlopen("{}/?json".format(target)) as file:
116 array = json.loads(file.read().decode("utf-8"))
117 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
118 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
119 profiles["{}/{}".format(target, profile)] = file.read().decode(
120 "utf-8"
121 )
122 return profiles
123
124 if not os.path.isfile(config_path):
125 print("file not found: {}".format(config_path))
126 exit(1)
127
128 # fetch release URLs
129 with urllib.request.urlopen(url) as infile:
130 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
131 if not path.startswith("/") and path.endswith("targets/"):
132 release = path.strip("/").split("/")[-2]
133 download_url = "{}/{}/{{target}}".format(url, path)
134
135 profiles = handle_release("{}/{}".format(url, path))
136 output = merge_profiles(profiles, download_url)
137 if len(output) > 0:
138 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
139 # write overview.json
140 with open(
141 "{}/{}/overview.json".format(data_path, release), "w"
142 ) as outfile:
143 if args.formatted:
144 json.dump(output, outfile, indent=" ", sort_keys=True)
145 else:
146 json.dump(output, outfile, sort_keys=True)
147
148 versions[release.upper()] = "data/{}/overview.json".format(release)
149
150 update_config(config_path, versions)
151
152
153 """
154 Scrape profiles.json using wget (slower but more generic).
155 Merge into overview.json files.
156 Update config.json.
157 """
158
159
160 def scrape_wget(args):
161 url = args.domain
162 selector_path = args.selector
163 config_path = "{}/config.js".format(selector_path)
164 data_path = "{}/data".format(selector_path)
165 versions = {}
166
167 with tempfile.TemporaryDirectory() as tmp_dir:
168 # download all profiles.json files
169 os.system(
170 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
171 tmp_dir, url
172 )
173 )
174
175 # delete empty folders
176 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
177
178 # create overview.json files
179 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
180 "{}/*/releases/*".format(tmp_dir)
181 ):
182 release = os.path.basename(path)
183 base = path[len(tmp_dir) + 1 :]
184
185 profiles = {}
186 for ppath in Path(path).rglob("profiles.json"):
187 with open(str(ppath), "r") as file:
188 profiles[ppath] = file.read()
189
190 if len(profiles) == 0:
191 continue
192
193 versions[release.upper()] = "data/{}/overview.json".format(release)
194
195 output = merge_profiles(
196 profiles, "https://{}/targets/{{target}}".format(base)
197 )
198 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
199
200 # write overview.json
201 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
202 if args.formatted:
203 json.dump(output, outfile, indent=" ", sort_keys=True)
204 else:
205 json.dump(output, outfile, sort_keys=True)
206
207 update_config(config_path, versions)
208
209
210 """
211 Find and merge json files for a single release.
212 """
213
214
215 def merge(args):
216 input_paths = args.input_path
217 # OpenWrt JSON device files
218 profiles = {}
219
220 def add_path(path):
221 with open(str(path), "r") as file:
222 profiles[path] = file.read()
223
224 for path in input_paths:
225 if os.path.isdir(path):
226 for filepath in Path(path).rglob("*.json"):
227 add_path(filepath)
228 else:
229 if not path.endswith(".json"):
230 sys.stderr.write("Folder does not exists: {}\n".format(path))
231 exit(1)
232 add_path(path)
233
234 output = merge_profiles(profiles, args.download_url)
235
236 if args.formatted:
237 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
238 else:
239 json.dump(output, sys.stdout, sort_keys=True)
240
241
242 """
243 Scan local directory for releases with profiles.json.
244 Merge into overview.json files.
245 Update config.json.
246 """
247
248
249 def scan(args):
250 selector_path = args.selector
251 config_path = "{}/config.js".format(selector_path)
252 data_path = "{}/data".format(selector_path)
253 versions = {}
254
255 # create overview.json files
256 for path in glob.glob("{}/snapshots".format(args.directory)) + glob.glob(
257 "{}/releases/*".format(args.directory)
258 ):
259 release = os.path.basename(path)
260 base_dir = path[len(args.directory) + 1 :]
261
262 profiles = {}
263 for ppath in Path(path).rglob("profiles.json"):
264 with open(str(ppath), "r", encoding="utf-8") as file:
265 profiles[ppath] = file.read()
266
267 if len(profiles) == 0:
268 continue
269
270 versions[release.upper()] = "data/{}/overview.json".format(release)
271
272 output = merge_profiles(
273 profiles, "https://{}/{}/targets/{{target}}".format(args.domain, base_dir)
274 )
275 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
276
277 # write overview.json
278 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
279 if args.formatted:
280 json.dump(output, outfile, indent=" ", sort_keys=True)
281 else:
282 json.dump(output, outfile, sort_keys=True)
283
284 update_config(config_path, versions)
285
286
287 def main():
288 parser = argparse.ArgumentParser()
289 parser.add_argument(
290 "--formatted", action="store_true", help="Output formatted JSON data."
291 )
292 subparsers = parser.add_subparsers(dest="action")
293 subparsers.required = True
294
295 parser_merge = subparsers.add_parser(
296 "merge",
297 help="Create a grid structure with horizontal and vertical connections.",
298 )
299 parser_merge.add_argument(
300 "input_path",
301 nargs="+",
302 help="Input folder that is traversed for OpenWrt JSON device files.",
303 )
304 parser_merge.add_argument(
305 "--download-url",
306 action="store",
307 default="",
308 help="Link to get the image from. May contain {target}, {version} and {commit}",
309 )
310
311 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
312 parser_scrape.add_argument(
313 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
314 )
315 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
316 parser_scrape.add_argument(
317 "--use-wget", action="store_true", help="Use wget to scrape the site."
318 )
319
320 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
321 parser_scan.add_argument(
322 "domain",
323 help="Domain for download_url attribute in overview.json. E.g. https://downloads.openwrt.org",
324 )
325 parser_scan.add_argument("directory", help="Directory to scan for releases.")
326 parser_scan.add_argument("selector", help="Path the config.js file is in.")
327
328 args = parser.parse_args()
329
330 if args.action == "merge":
331 merge(args)
332
333 if args.action == "scan":
334 scan(args)
335
336 if args.action == "scrape":
337 if args.use_wget:
338 scrape_wget(args)
339 else:
340 scrape(args)
341
342
343 if __name__ == "__main__":
344 main()