scan local folder for release and profiles.json files
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18
19 # accepts {<file-path>: <file-content>}
20 def merge_profiles(profiles, download_url):
21 # json output data
22 output = {}
23
24 def get_title(title):
25 if "title" in title:
26 return title["title"]
27 else:
28 return "{} {} {}".format(
29 title.get("vendor", ""), title["model"], title.get("variant", "")
30 ).strip()
31
32 def add_profile(id, target, profile, code=None):
33 images = []
34 for image in profile["images"]:
35 images.append({"name": image["name"], "type": image["type"]})
36
37 if target is None:
38 target = profile["target"]
39
40 for entry in profile["titles"]:
41 title = get_title(entry)
42
43 if len(title) == 0:
44 sys.stderr.write(f"Empty title. Skip title in {path}\n")
45 continue
46
47 output["models"][title] = {"id": id, "target": target, "images": images}
48
49 if code is not None:
50 output["models"][title]["code"] = code
51
52 for path, content in profiles.items():
53 obj = json.loads(content)
54
55 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
56 sys.stderr.write(
57 f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n"
58 )
59 continue
60
61 code = obj.get("version_code", obj.get("version_commit"))
62
63 if "version_code" not in output:
64 output = {"version_code": code, "download_url": download_url, "models": {}}
65
66 # if we have mixed codes/commits, store in device object
67 if output["version_code"] == code:
68 code = None
69
70 try:
71 if "profiles" in obj:
72 for id in obj["profiles"]:
73 add_profile(id, obj.get("target"), obj["profiles"][id], code)
74 else:
75 add_profile(obj["id"], obj["target"], obj, code)
76 except json.decoder.JSONDecodeError as e:
77 sys.stderr.write(f"Skip {path}\n {e}\n")
78 except KeyError as e:
79 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
80 exit(1)
81
82 return output
83
84
85 def update_config(config_path, versions):
86 content = ""
87 with open(config_path, "r") as file:
88 content = file.read()
89
90 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}", content)
91 with open(config_path, "w+") as file:
92 file.write(content)
93
94
95 """
96 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
97 Merge into overview.json files.
98 Update config.json.
99 """
100
101
102 def scrape(args):
103 url = args.domain
104 selector_path = args.selector
105 config_path = f"{selector_path}/config.js"
106 data_path = f"{selector_path}/data"
107 versions = {}
108
109 def handle_release(target):
110 profiles = {}
111 with urllib.request.urlopen(f"{target}/?json") as file:
112 array = json.loads(file.read().decode("utf-8"))
113 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
114 with urllib.request.urlopen(f"{target}/{profile}") as file:
115 profiles[f"{target}/{profile}"] = file.read()
116 return profiles
117
118 if not os.path.isfile(config_path):
119 print(f"file not found: {config_path}")
120 exit(1)
121
122 # fetch release URLs
123 with urllib.request.urlopen(url) as infile:
124 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
125 if not path.startswith("/") and path.endswith("targets/"):
126 release = path.strip("/").split("/")[-2]
127 download_url = f"{url}/{path}/{{target}}"
128
129 profiles = handle_release(f"{url}/{path}")
130 output = merge_profiles(profiles, download_url)
131 if len(output) > 0:
132 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
133 # write overview.json
134 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
135 if args.formatted:
136 json.dump(output, outfile, indent=" ", sort_keys=True)
137 else:
138 json.dump(output, outfile, sort_keys=True)
139
140 versions[release.upper()] = f"data/{release}/overview.json"
141
142 update_config(config_path, versions)
143
144
145 """
146 Scrape profiles.json using wget (slower but more generic).
147 Merge into overview.json files.
148 Update config.json.
149 """
150
151
152 def scrape_wget(args):
153 url = args.domain
154 selector_path = args.selector
155 config_path = f"{selector_path}/config.js"
156 data_path = f"{selector_path}/data"
157 versions = {}
158
159 with tempfile.TemporaryDirectory() as tmp_dir:
160 # download all profiles.json files
161 os.system(
162 f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}"
163 )
164
165 # delete empty folders
166 os.system(f"find {tmp_dir}/* -type d -empty -delete")
167
168 # create overview.json files
169 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(
170 f"{tmp_dir}/*/releases/*"
171 ):
172 release = os.path.basename(path)
173 base = path[len(tmp_dir) + 1 :]
174
175 profiles = {}
176 for ppath in Path(path).rglob("profiles.json"):
177 with open(ppath, "r") as file:
178 profiles[ppath] = file.read()
179
180 if len(profiles) == 0:
181 continue
182
183 versions[release.upper()] = f"data/{release}/overview.json"
184 os.system(f"mkdir -p {selector_path}/data/{release}/")
185
186 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
187 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
188
189 # write overview.json
190 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
191 if args.formatted:
192 json.dump(output, outfile, indent=" ", sort_keys=True)
193 else:
194 json.dump(output, outfile, sort_keys=True)
195
196 update_config(config_path, versions)
197
198
199 """
200 Find and merge json files for a single release.
201 """
202
203
204 def merge(args):
205 input_paths = args.input_path
206 # OpenWrt JSON device files
207 profiles = {}
208
209 def add_path(path):
210 with open(path, "r") as file:
211 profiles[path] = file.read()
212
213 for path in input_paths:
214 if os.path.isdir(path):
215 for filepath in Path(path).rglob("*.json"):
216 add_path(filepath)
217 else:
218 if not path.endswith(".json"):
219 sys.stderr.write(f"Folder does not exists: {path}\n")
220 exit(1)
221 add_path(path)
222
223 output = merge_profiles(profiles, args.download_url)
224
225 if args.formatted:
226 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
227 else:
228 json.dump(output, sys.stdout, sort_keys=True)
229
230
231 """
232 Scan local directory for releases with profiles.json.
233 Merge into overview.json files.
234 Update config.json.
235 """
236
237
238 def scan(args):
239 selector_path = args.selector
240 config_path = f"{selector_path}/config.js"
241 data_path = f"{selector_path}/data"
242 versions = {}
243
244 # create overview.json files
245 for path in glob.glob(f"{args.directory}/snapshots") + glob.glob(
246 f"{args.directory}/releases/*"
247 ):
248 release = os.path.basename(path)
249 base_dir = path[len(args.directory) + 1 :]
250
251 profiles = {}
252 for ppath in Path(path).rglob("profiles.json"):
253 with open(ppath, "r") as file:
254 profiles[ppath] = file.read()
255
256 if len(profiles) == 0:
257 continue
258
259 versions[release.upper()] = f"data/{release}/overview.json"
260 os.system(f"mkdir -p {selector_path}/data/{release}/")
261
262 output = merge_profiles(
263 profiles, f"https://{args.domain}/{base_dir}/targets/{{target}}"
264 )
265 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
266
267 # write overview.json
268 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
269 if args.formatted:
270 json.dump(output, outfile, indent=" ", sort_keys=True)
271 else:
272 json.dump(output, outfile, sort_keys=True)
273
274 update_config(config_path, versions)
275
276
277 def main():
278 parser = argparse.ArgumentParser()
279 parser.add_argument(
280 "--formatted", action="store_true", help="Output formatted JSON data."
281 )
282 subparsers = parser.add_subparsers(dest="action", required=True)
283
284 parser_merge = subparsers.add_parser(
285 "merge",
286 help="Create a grid structure with horizontal and vertical connections.",
287 )
288 parser_merge.add_argument(
289 "input_path",
290 nargs="+",
291 help="Input folder that is traversed for OpenWrt JSON device files.",
292 )
293 parser_merge.add_argument(
294 "--download-url",
295 action="store",
296 default="",
297 help="Link to get the image from. May contain {target}, {version} and {commit}",
298 )
299
300 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
301 parser_scrape.add_argument(
302 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
303 )
304 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
305 parser_scrape.add_argument(
306 "--use-wget", action="store_true", help="Use wget to scrape the site."
307 )
308
309 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
310 parser_scan.add_argument(
311 "domain",
312 help="Domain for download_url attribute in overview.json. E.g. https://downloads.openwrt.org",
313 )
314 parser_scan.add_argument("directory", help="Directory to scan for releases.")
315 parser_scan.add_argument("selector", help="Path the config.js file is in.")
316
317 args = parser.parse_args()
318
319 if args.action == "merge":
320 merge(args)
321
322 if args.action == "scan":
323 scan(args)
324
325 if args.action == "scrape":
326 if args.use_wget:
327 scrape_wget(args)
328 else:
329 scrape(args)
330
331
332 if __name__ == "__main__":
333 main()