misc/collect.py: do not upper case the release name
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18 assert sys.version_info >= (3, 5), "Python version too old. Python >=3.5.0 needed."
19
20
21 # accepts {<file-path>: <file-content>}
22 def merge_profiles(profiles, download_url):
23 # json output data
24 output = {}
25
26 def get_title(title):
27 if "title" in title:
28 return title["title"]
29 else:
30 return "{} {} {}".format(
31 title.get("vendor", ""), title["model"], title.get("variant", "")
32 ).strip()
33
34 def add_profile(path, id, target, profile, code=None):
35 images = []
36 for image in profile["images"]:
37 images.append({"name": image["name"], "type": image["type"]})
38
39 if target is None:
40 target = profile["target"]
41
42 for entry in profile["titles"]:
43 title = get_title(entry)
44
45 if len(title) == 0:
46 sys.stderr.write(
47 "Empty title. Skip title for {} in {}\n".format(id, path)
48 )
49 continue
50
51 output["models"][title] = {"id": id, "target": target, "images": images}
52
53 if code is not None:
54 output["models"][title]["code"] = code
55
56 for path, content in profiles.items():
57 obj = json.loads(content)
58
59 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
60 sys.stderr.write(
61 "{} has unsupported metadata version: {} => skip\n".format(
62 path, obj["metadata_version"]
63 )
64 )
65 continue
66
67 code = obj.get("version_code", obj.get("version_commit"))
68
69 if "version_code" not in output:
70 output = {"version_code": code, "download_url": download_url, "models": {}}
71
72 # if we have mixed codes/commits, store in device object
73 if output["version_code"] == code:
74 code = None
75
76 try:
77 if "profiles" in obj:
78 for id in obj["profiles"]:
79 add_profile(path, id, obj.get("target"), obj["profiles"][id], code)
80 else:
81 add_profile(path, obj["id"], obj["target"], obj, code)
82 except json.decoder.JSONDecodeError as e:
83 sys.stderr.write("Skip {}\n {}\n".format(path, e))
84 except KeyError as e:
85 sys.stderr.write("Abort on {}\n Missing key {}\n".format(path, e))
86 exit(1)
87
88 return output
89
90
91 def update_config(config_path, versions):
92 content = ""
93 with open(str(config_path), "r") as file:
94 content = file.read()
95
96 content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
97 with open(str(config_path), "w+") as file:
98 file.write(content)
99
100
101 """
102 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
103 Merge into overview.json files.
104 Update config.json.
105 """
106
107
108 def scrape(args):
109 url = args.domain
110 selector_path = args.selector
111 config_path = "{}/config.js".format(selector_path)
112 data_path = "{}/data".format(selector_path)
113 versions = {}
114
115 def handle_release(target):
116 profiles = {}
117 with urllib.request.urlopen("{}/?json".format(target)) as file:
118 array = json.loads(file.read().decode("utf-8"))
119 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
120 with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
121 profiles["{}/{}".format(target, profile)] = file.read().decode(
122 "utf-8"
123 )
124 return profiles
125
126 if not os.path.isfile(config_path):
127 print("file not found: {}".format(config_path))
128 exit(1)
129
130 # fetch release URLs
131 with urllib.request.urlopen(url) as infile:
132 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
133 if not path.startswith("/") and path.endswith("targets/"):
134 release = path.strip("/").split("/")[-2]
135 download_url = "{}/{}/{{target}}".format(url, path)
136
137 profiles = handle_release("{}/{}".format(url, path))
138 output = merge_profiles(profiles, download_url)
139 if len(output) > 0:
140 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
141 # write overview.json
142 with open(
143 "{}/{}/overview.json".format(data_path, release), "w"
144 ) as outfile:
145 if args.formatted:
146 json.dump(output, outfile, indent=" ", sort_keys=True)
147 else:
148 json.dump(output, outfile, sort_keys=True)
149
150 versions[release] = "data/{}/overview.json".format(release)
151
152 update_config(config_path, versions)
153
154
155 """
156 Scrape profiles.json using wget (slower but more generic).
157 Merge into overview.json files.
158 Update config.json.
159 """
160
161
162 def scrape_wget(args):
163 url = args.domain
164 selector_path = args.selector
165 config_path = "{}/config.js".format(selector_path)
166 data_path = "{}/data".format(selector_path)
167 versions = {}
168
169 with tempfile.TemporaryDirectory() as tmp_dir:
170 # download all profiles.json files
171 os.system(
172 "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
173 tmp_dir, url
174 )
175 )
176
177 # delete empty folders
178 os.system("find {}/* -type d -empty -delete".format(tmp_dir))
179
180 # create overview.json files
181 for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
182 "{}/*/releases/*".format(tmp_dir)
183 ):
184 release = os.path.basename(path)
185 base = path[len(tmp_dir) + 1 :]
186
187 profiles = {}
188 for ppath in Path(path).rglob("profiles.json"):
189 with open(str(ppath), "r") as file:
190 profiles[ppath] = file.read()
191
192 if len(profiles) == 0:
193 continue
194
195 versions[release] = "data/{}/overview.json".format(release)
196
197 output = merge_profiles(
198 profiles, "https://{}/targets/{{target}}".format(base)
199 )
200 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
201
202 # write overview.json
203 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
204 if args.formatted:
205 json.dump(output, outfile, indent=" ", sort_keys=True)
206 else:
207 json.dump(output, outfile, sort_keys=True)
208
209 update_config(config_path, versions)
210
211
212 """
213 Find and merge json files for a single release.
214 """
215
216
217 def merge(args):
218 input_paths = args.input_path
219 # OpenWrt JSON device files
220 profiles = {}
221
222 def add_path(path):
223 with open(str(path), "r") as file:
224 profiles[path] = file.read()
225
226 for path in input_paths:
227 if os.path.isdir(path):
228 for filepath in Path(path).rglob("*.json"):
229 add_path(filepath)
230 else:
231 if not path.endswith(".json"):
232 sys.stderr.write("Folder does not exists: {}\n".format(path))
233 exit(1)
234 add_path(path)
235
236 output = merge_profiles(profiles, args.download_url)
237
238 if args.formatted:
239 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
240 else:
241 json.dump(output, sys.stdout, sort_keys=True)
242
243
244 """
245 Scan local directory for releases with profiles.json.
246 Merge into overview.json files.
247 Update config.json.
248 """
249
250
251 def scan(args):
252 selector_path = args.selector
253 config_path = "{}/config.js".format(selector_path)
254 data_path = "{}/data".format(selector_path)
255 versions = {}
256
257 # create overview.json files
258 for path in glob.glob("{}/snapshots".format(args.directory)) + glob.glob(
259 "{}/releases/*".format(args.directory)
260 ):
261 release = os.path.basename(path)
262 base_dir = path[len(args.directory) + 1 :]
263
264 profiles = {}
265 for ppath in Path(path).rglob("profiles.json"):
266 with open(str(ppath), "r", encoding="utf-8") as file:
267 profiles[ppath] = file.read()
268
269 if len(profiles) == 0:
270 continue
271
272 versions[release] = "data/{}/overview.json".format(release)
273
274 output = merge_profiles(
275 profiles, "https://{}/{}/targets/{{target}}".format(args.domain, base_dir)
276 )
277 os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
278
279 # write overview.json
280 with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
281 if args.formatted:
282 json.dump(output, outfile, indent=" ", sort_keys=True)
283 else:
284 json.dump(output, outfile, sort_keys=True)
285
286 update_config(config_path, versions)
287
288
289 def main():
290 parser = argparse.ArgumentParser()
291 parser.add_argument(
292 "--formatted", action="store_true", help="Output formatted JSON data."
293 )
294 subparsers = parser.add_subparsers(dest="action")
295 subparsers.required = True
296
297 parser_merge = subparsers.add_parser(
298 "merge",
299 help="Create a grid structure with horizontal and vertical connections.",
300 )
301 parser_merge.add_argument(
302 "input_path",
303 nargs="+",
304 help="Input folder that is traversed for OpenWrt JSON device files.",
305 )
306 parser_merge.add_argument(
307 "--download-url",
308 action="store",
309 default="",
310 help="Link to get the image from. May contain {target}, {version} and {commit}",
311 )
312
313 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
314 parser_scrape.add_argument(
315 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
316 )
317 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
318 parser_scrape.add_argument(
319 "--use-wget", action="store_true", help="Use wget to scrape the site."
320 )
321
322 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
323 parser_scan.add_argument(
324 "domain",
325 help="Domain for download_url attribute in overview.json. E.g. https://downloads.openwrt.org",
326 )
327 parser_scan.add_argument("directory", help="Directory to scan for releases.")
328 parser_scan.add_argument("selector", help="Path the config.js file is in.")
329
330 args = parser.parse_args()
331
332 if args.action == "merge":
333 merge(args)
334
335 if args.action == "scan":
336 scan(args)
337
338 if args.action == "scrape":
339 if args.use_wget:
340 scrape_wget(args)
341 else:
342 scrape(args)
343
344
345 if __name__ == "__main__":
346 main()