decode profiles.json content from bytes to string
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18
19 # accepts {<file-path>: <file-content>}
20 def merge_profiles(profiles, download_url):
21 # json output data
22 output = {}
23
24 def get_title(title):
25 if "title" in title:
26 return title["title"]
27 else:
28 return "{} {} {}".format(
29 title.get("vendor", ""), title["model"], title.get("variant", "")
30 ).strip()
31
32 def add_profile(id, target, profile, code=None):
33 images = []
34 for image in profile["images"]:
35 images.append({"name": image["name"], "type": image["type"]})
36
37 if target is None:
38 target = profile["target"]
39
40 for entry in profile["titles"]:
41 title = get_title(entry)
42
43 if len(title) == 0:
44 sys.stderr.write("Empty title. Skip title for {}".format(id))
45 continue
46
47 output["models"][title] = {"id": id, "target": target, "images": images}
48
49 if code is not None:
50 output["models"][title]["code"] = code
51
52 for path, content in profiles.items():
53 obj = json.loads(content.decode("utf-8"))
54
55 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
56 sys.stderr.write(
57 "{} has unsupported metadata version: {} => skip".format(
58 path, obj["metadata_version"]
59 )
60 )
61 continue
62
63 code = obj.get("version_code", obj.get("version_commit"))
64
65 if "version_code" not in output:
66 output = {"version_code": code, "download_url": download_url, "models": {}}
67
68 # if we have mixed codes/commits, store in device object
69 if output["version_code"] == code:
70 code = None
71
72 try:
73 if "profiles" in obj:
74 for id in obj["profiles"]:
75 add_profile(id, obj.get("target"), obj["profiles"][id], code)
76 else:
77 add_profile(obj["id"], obj["target"], obj, code)
78 except json.decoder.JSONDecodeError as e:
79 sys.stderr.write("Skip {}\n {}\n".format(path, e))
80 except KeyError as e:
81 sys.stderr.write("Abort on {}\n Missing key {}\n".format(path, e))
82 exit(1)
83
84 return output
85
86
87 def update_config(config_path, versions):
88 content = ""
89 with open(config_path, "r") as file:
90 content = file.read()
91
92 content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
93 with open(config_path, "w+") as file:
94 file.write(content)
95
96
97 """
98 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
99 Merge into overview.json files.
100 Update config.json.
101 """
102
103
104 def scrape(args):
105 url = args.domain
106 selector_path = args.selector
107 config_path = "{}/config.js".format(selector_path)
108 data_path = "{}/data".format(selector_path)
109 versions = {}
110
111 def handle_release(target):
112 profiles = {}
113 with urllib.request.urlopen("{}/?json".format(target)) as file:
114 array = json.loads(file.read().decode("utf-8"))
115 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
116 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
117 profiles["{}/{}".format(target, profile)] = file.read()
118 return profiles
119
120 if not os.path.isfile(config_path):
121 print("file not found: {}".format(config_path))
122 exit(1)
123
124 # fetch release URLs
125 with urllib.request.urlopen(url) as infile:
126 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
127 if not path.startswith("/") and path.endswith("targets/"):
128 release = path.strip("/").split("/")[-2]
129 download_url = "{}/{}/{{target}}".format(url, path)
130
131 profiles = handle_release("{}/{}".format(url, path))
132 output = merge_profiles(profiles, download_url)
133 if len(output) > 0:
134 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
135 # write overview.json
136 with open(
137 "{}/{}/overview.json".format(data_path, release), "w"
138 ) as outfile:
139 if args.formatted:
140 json.dump(output, outfile, indent=" ", sort_keys=True)
141 else:
142 json.dump(output, outfile, sort_keys=True)
143
144 versions[release.upper()] = "data/{}/overview.json".format(release)
145
146 update_config(config_path, versions)
147
148
149 """
150 Scrape profiles.json using wget (slower but more generic).
151 Merge into overview.json files.
152 Update config.json.
153 """
154
155
156 def scrape_wget(args):
157 url = args.domain
158 selector_path = args.selector
159 config_path = "{}/config.js".format(selector_path)
160 data_path = "{}/data".format(selector_path)
161 versions = {}
162
163 with tempfile.TemporaryDirectory() as tmp_dir:
164 # download all profiles.json files
165 os.system(
166 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
167 tmp_dir, url
168 )
169 )
170
171 # delete empty folders
172 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
173
174 # create overview.json files
175 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
176 "{}/*/releases/*".format(tmp_dir)
177 ):
178 release = os.path.basename(path)
179 base = path[len(tmp_dir) + 1 :]
180
181 profiles = {}
182 for ppath in Path(path).rglob("profiles.json"):
183 with open(ppath, "r") as file:
184 profiles[ppath] = file.read()
185
186 if len(profiles) == 0:
187 continue
188
189 versions[release.upper()] = "data/{}/overview.json".format(release)
190
191 output = merge_profiles(
192 profiles, "https://{}/targets/{{target}}".format(base)
193 )
194 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
195
196 # write overview.json
197 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
198 if args.formatted:
199 json.dump(output, outfile, indent=" ", sort_keys=True)
200 else:
201 json.dump(output, outfile, sort_keys=True)
202
203 update_config(config_path, versions)
204
205
206 """
207 Find and merge json files for a single release.
208 """
209
210
211 def merge(args):
212 input_paths = args.input_path
213 # OpenWrt JSON device files
214 profiles = {}
215
216 def add_path(path):
217 with open(path, "r") as file:
218 profiles[path] = file.read()
219
220 for path in input_paths:
221 if os.path.isdir(path):
222 for filepath in Path(path).rglob("*.json"):
223 add_path(filepath)
224 else:
225 if not path.endswith(".json"):
226 sys.stderr.write("Folder does not exists: {}\n".format(path))
227 exit(1)
228 add_path(path)
229
230 output = merge_profiles(profiles, args.download_url)
231
232 if args.formatted:
233 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
234 else:
235 json.dump(output, sys.stdout, sort_keys=True)
236
237
238 """
239 Scan local directory for releases with profiles.json.
240 Merge into overview.json files.
241 Update config.json.
242 """
243
244
245 def scan(args):
246 selector_path = args.selector
247 config_path = "{}/config.js".format(selector_path)
248 data_path = "{}/data".format(selector_path)
249 versions = {}
250
251 # create overview.json files
252 for path in glob.glob("{}/snapshots".format(args.directory)) + glob.glob(
253 "{}/releases/*".format(args.directory)
254 ):
255 release = os.path.basename(path)
256 base_dir = path[len(args.directory) + 1 :]
257
258 profiles = {}
259 for ppath in Path(path).rglob("profiles.json"):
260 with open(ppath, "r") as file:
261 profiles[ppath] = file.read()
262
263 if len(profiles) == 0:
264 continue
265
266 versions[release.upper()] = "data/{}/overview.json".format(release)
267
268 output = merge_profiles(
269 profiles, "https://{}/{}/targets/{{target}}".format(args.domain, base_dir)
270 )
271 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
272
273 # write overview.json
274 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
275 if args.formatted:
276 json.dump(output, outfile, indent=" ", sort_keys=True)
277 else:
278 json.dump(output, outfile, sort_keys=True)
279
280 update_config(config_path, versions)
281
282
283 def main():
284 parser = argparse.ArgumentParser()
285 parser.add_argument(
286 "--formatted", action="store_true", help="Output formatted JSON data."
287 )
288 subparsers = parser.add_subparsers(dest="action")
289 subparsers.required = True
290
291 parser_merge = subparsers.add_parser(
292 "merge",
293 help="Create a grid structure with horizontal and vertical connections.",
294 )
295 parser_merge.add_argument(
296 "input_path",
297 nargs="+",
298 help="Input folder that is traversed for OpenWrt JSON device files.",
299 )
300 parser_merge.add_argument(
301 "--download-url",
302 action="store",
303 default="",
304 help="Link to get the image from. May contain {target}, {version} and {commit}",
305 )
306
307 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
308 parser_scrape.add_argument(
309 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
310 )
311 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
312 parser_scrape.add_argument(
313 "--use-wget", action="store_true", help="Use wget to scrape the site."
314 )
315
316 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
317 parser_scan.add_argument(
318 "domain",
319 help="Domain for download_url attribute in overview.json. E.g. https://downloads.openwrt.org",
320 )
321 parser_scan.add_argument("directory", help="Directory to scan for releases.")
322 parser_scan.add_argument("selector", help="Path the config.js file is in.")
323
324 args = parser.parse_args()
325
326 if args.action == "merge":
327 merge(args)
328
329 if args.action == "scan":
330 scan(args)
331
332 if args.action == "scrape":
333 if args.use_wget:
334 scrape_wget(args)
335 else:
336 scrape(args)
337
338
339 if __name__ == "__main__":
340 main()