d3c1f5a5b5ece382cead24a1b106ebfedddcc85e
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18
19 # accepts {<file-path>: <file-content>}
20 def merge_profiles(profiles, download_url):
21 # json output data
22 output = {}
23
24 def get_title(title):
25 if "title" in title:
26 return title["title"]
27 else:
28 return "{} {} {}".format(
29 title.get("vendor", ""), title["model"], title.get("variant", "")
30 ).strip()
31
32 def add_profile(id, target, profile, code=None):
33 images = []
34 for image in profile["images"]:
35 images.append({"name": image["name"], "type": image["type"]})
36
37 if target is None:
38 target = profile["target"]
39
40 for entry in profile["titles"]:
41 title = get_title(entry)
42
43 if len(title) == 0:
44 sys.stderr.write("Empty title. Skip title for {}".format(id))
45 continue
46
47 output["models"][title] = {"id": id, "target": target, "images": images}
48
49 if code is not None:
50 output["models"][title]["code"] = code
51
52 for path, content in profiles.items():
53 obj = json.loads(content)
54
55 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
56 sys.stderr.write(
57 "{} has unsupported metadata version: {} => skip".format(
58 path, obj["metadata_version"]
59 )
60 )
61 continue
62
63 code = obj.get("version_code", obj.get("version_commit"))
64
65 if "version_code" not in output:
66 output = {"version_code": code, "download_url": download_url, "models": {}}
67
68 # if we have mixed codes/commits, store in device object
69 if output["version_code"] == code:
70 code = None
71
72 try:
73 if "profiles" in obj:
74 for id in obj["profiles"]:
75 add_profile(id, obj.get("target"), obj["profiles"][id], code)
76 else:
77 add_profile(obj["id"], obj["target"], obj, code)
78 except json.decoder.JSONDecodeError as e:
79 sys.stderr.write("Skip {}\n {}\n".format(path, e))
80 except KeyError as e:
81 sys.stderr.write("Abort on {}\n Missing key {}\n".format(path, e))
82 exit(1)
83
84 return output
85
86
87 def update_config(config_path, versions):
88 content = ""
89 with open(str(config_path), "r") as file:
90 content = file.read()
91
92 content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
93 with open(str(config_path), "w+") as file:
94 file.write(content)
95
96
97 """
98 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
99 Merge into overview.json files.
100 Update config.json.
101 """
102
103
104 def scrape(args):
105 url = args.domain
106 selector_path = args.selector
107 config_path = "{}/config.js".format(selector_path)
108 data_path = "{}/data".format(selector_path)
109 versions = {}
110
111 def handle_release(target):
112 profiles = {}
113 with urllib.request.urlopen("{}/?json".format(target)) as file:
114 array = json.loads(file.read().decode("utf-8"))
115 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
116 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
117 profiles["{}/{}".format(target, profile)] = file.read().decode(
118 "utf-8"
119 )
120 return profiles
121
122 if not os.path.isfile(config_path):
123 print("file not found: {}".format(config_path))
124 exit(1)
125
126 # fetch release URLs
127 with urllib.request.urlopen(url) as infile:
128 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
129 if not path.startswith("/") and path.endswith("targets/"):
130 release = path.strip("/").split("/")[-2]
131 download_url = "{}/{}/{{target}}".format(url, path)
132
133 profiles = handle_release("{}/{}".format(url, path))
134 output = merge_profiles(profiles, download_url)
135 if len(output) > 0:
136 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
137 # write overview.json
138 with open(
139 "{}/{}/overview.json".format(data_path, release), "w"
140 ) as outfile:
141 if args.formatted:
142 json.dump(output, outfile, indent=" ", sort_keys=True)
143 else:
144 json.dump(output, outfile, sort_keys=True)
145
146 versions[release.upper()] = "data/{}/overview.json".format(release)
147
148 update_config(config_path, versions)
149
150
151 """
152 Scrape profiles.json using wget (slower but more generic).
153 Merge into overview.json files.
154 Update config.json.
155 """
156
157
158 def scrape_wget(args):
159 url = args.domain
160 selector_path = args.selector
161 config_path = "{}/config.js".format(selector_path)
162 data_path = "{}/data".format(selector_path)
163 versions = {}
164
165 with tempfile.TemporaryDirectory() as tmp_dir:
166 # download all profiles.json files
167 os.system(
168 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
169 tmp_dir, url
170 )
171 )
172
173 # delete empty folders
174 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
175
176 # create overview.json files
177 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
178 "{}/*/releases/*".format(tmp_dir)
179 ):
180 release = os.path.basename(path)
181 base = path[len(tmp_dir) + 1 :]
182
183 profiles = {}
184 for ppath in Path(path).rglob("profiles.json"):
185 with open(str(ppath), "r") as file:
186 profiles[ppath] = file.read()
187
188 if len(profiles) == 0:
189 continue
190
191 versions[release.upper()] = "data/{}/overview.json".format(release)
192
193 output = merge_profiles(
194 profiles, "https://{}/targets/{{target}}".format(base)
195 )
196 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
197
198 # write overview.json
199 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
200 if args.formatted:
201 json.dump(output, outfile, indent=" ", sort_keys=True)
202 else:
203 json.dump(output, outfile, sort_keys=True)
204
205 update_config(config_path, versions)
206
207
208 """
209 Find and merge json files for a single release.
210 """
211
212
213 def merge(args):
214 input_paths = args.input_path
215 # OpenWrt JSON device files
216 profiles = {}
217
218 def add_path(path):
219 with open(str(path), "r") as file:
220 profiles[path] = file.read()
221
222 for path in input_paths:
223 if os.path.isdir(path):
224 for filepath in Path(path).rglob("*.json"):
225 add_path(filepath)
226 else:
227 if not path.endswith(".json"):
228 sys.stderr.write("Folder does not exists: {}\n".format(path))
229 exit(1)
230 add_path(path)
231
232 output = merge_profiles(profiles, args.download_url)
233
234 if args.formatted:
235 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
236 else:
237 json.dump(output, sys.stdout, sort_keys=True)
238
239
240 """
241 Scan local directory for releases with profiles.json.
242 Merge into overview.json files.
243 Update config.json.
244 """
245
246
247 def scan(args):
248 selector_path = args.selector
249 config_path = "{}/config.js".format(selector_path)
250 data_path = "{}/data".format(selector_path)
251 versions = {}
252
253 # create overview.json files
254 for path in glob.glob("{}/snapshots".format(args.directory)) + glob.glob(
255 "{}/releases/*".format(args.directory)
256 ):
257 release = os.path.basename(path)
258 base_dir = path[len(args.directory) + 1 :]
259
260 profiles = {}
261 for ppath in Path(path).rglob("profiles.json"):
262 with open(str(ppath), "r", encoding="utf-8") as file:
263 profiles[ppath] = file.read()
264
265 if len(profiles) == 0:
266 continue
267
268 versions[release.upper()] = "data/{}/overview.json".format(release)
269
270 output = merge_profiles(
271 profiles, "https://{}/{}/targets/{{target}}".format(args.domain, base_dir)
272 )
273 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
274
275 # write overview.json
276 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
277 if args.formatted:
278 json.dump(output, outfile, indent=" ", sort_keys=True)
279 else:
280 json.dump(output, outfile, sort_keys=True)
281
282 update_config(config_path, versions)
283
284
285 def main():
286 parser = argparse.ArgumentParser()
287 parser.add_argument(
288 "--formatted", action="store_true", help="Output formatted JSON data."
289 )
290 subparsers = parser.add_subparsers(dest="action")
291 subparsers.required = True
292
293 parser_merge = subparsers.add_parser(
294 "merge",
295 help="Create a grid structure with horizontal and vertical connections.",
296 )
297 parser_merge.add_argument(
298 "input_path",
299 nargs="+",
300 help="Input folder that is traversed for OpenWrt JSON device files.",
301 )
302 parser_merge.add_argument(
303 "--download-url",
304 action="store",
305 default="",
306 help="Link to get the image from. May contain {target}, {version} and {commit}",
307 )
308
309 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
310 parser_scrape.add_argument(
311 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
312 )
313 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
314 parser_scrape.add_argument(
315 "--use-wget", action="store_true", help="Use wget to scrape the site."
316 )
317
318 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
319 parser_scan.add_argument(
320 "domain",
321 help="Domain for download_url attribute in overview.json. E.g. https://downloads.openwrt.org",
322 )
323 parser_scan.add_argument("directory", help="Directory to scan for releases.")
324 parser_scan.add_argument("selector", help="Path the config.js file is in.")
325
326 args = parser.parse_args()
327
328 if args.action == "merge":
329 merge(args)
330
331 if args.action == "scan":
332 scan(args)
333
334 if args.action == "scrape":
335 if args.use_wget:
336 scrape_wget(args)
337 else:
338 scrape(args)
339
340
341 if __name__ == "__main__":
342 main()