collect.py: add flake8 code style tool
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18
19 # accepts {<file-path>: <file-content>}
20 def merge_profiles(profiles, download_url):
21 # json output data
22 output = {}
23
24 def get_title(title):
25 if "title" in title:
26 return title["title"]
27 else:
28 return "{} {} {}".format(
29 title.get("vendor", ""), title["model"], title.get("variant", "")
30 ).strip()
31
32 def add_profile(id, target, profile, code=None):
33 images = []
34 for image in profile["images"]:
35 images.append({"name": image["name"], "type": image["type"]})
36
37 if target is None:
38 target = profile["target"]
39
40 for entry in profile["titles"]:
41 title = get_title(entry)
42
43 if len(title) == 0:
44 sys.stderr.write(f"Empty title. Skip title in {path}\n")
45 continue
46
47 output["models"][title] = {"id": id, "target": target, "images": images}
48
49 if code is not None:
50 output["models"][title]["code"] = code
51
52 for path, content in profiles.items():
53 obj = json.loads(content)
54
55 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
56 sys.stderr.write(
57 f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n"
58 )
59 continue
60
61 code = obj.get("version_code", obj.get("version_commit"))
62
63 if "version_code" not in output:
64 output = {"version_code": code, "download_url": download_url, "models": {}}
65
66 # if we have mixed codes/commits, store in device object
67 if output["version_code"] == code:
68 code = None
69
70 try:
71 if "profiles" in obj:
72 for id in obj["profiles"]:
73 add_profile(id, obj.get("target"), obj["profiles"][id], code)
74 else:
75 add_profile(obj["id"], obj["target"], obj, code)
76 except json.decoder.JSONDecodeError as e:
77 sys.stderr.write(f"Skip {path}\n {e}\n")
78 except KeyError as e:
79 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
80 exit(1)
81
82 return output
83
84
85 def update_config(config_path, versions):
86 content = ""
87 with open(config_path, "r") as file:
88 content = file.read()
89
90 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}", content)
91 with open(config_path, "w+") as file:
92 file.write(content)
93
94
95 """
96 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
97 Merge into overview.json files.
98 Update config.json.
99 """
100
101
102 def scrape(args):
103 url = args.domain
104 selector_path = args.selector
105 config_path = f"{selector_path}/config.js"
106 data_path = f"{selector_path}/data"
107 versions = {}
108
109 def handle_release(target):
110 profiles = {}
111 with urllib.request.urlopen(f"{target}/?json") as file:
112 array = json.loads(file.read().decode("utf-8"))
113 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
114 with urllib.request.urlopen(f"{target}/{profile}") as file:
115 profiles[f"{target}/{profile}"] = file.read()
116 return profiles
117
118 if not os.path.isfile(config_path):
119 print(f"file not found: {config_path}")
120 exit(1)
121
122 # fetch release URLs
123 with urllib.request.urlopen(url) as infile:
124 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
125 if not path.startswith("/") and path.endswith("targets/"):
126 release = path.strip("/").split("/")[-2]
127 download_url = f"{url}/{path}/{{target}}"
128
129 profiles = handle_release(f"{url}/{path}")
130 output = merge_profiles(profiles, download_url)
131 if len(output) > 0:
132 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
133 # write overview.json
134 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
135 if args.formatted:
136 json.dump(output, outfile, indent=" ", sort_keys=True)
137 else:
138 json.dump(output, outfile, sort_keys=True)
139
140 versions[release.upper()] = f"data/{release}/overview.json"
141
142 update_config(config_path, versions)
143
144
145 """
146 Scrape profiles.json using wget (slower but more generic).
147 Merge into overview.json files.
148 Update config.json.
149 """
150
151
152 def scrape_wget(args):
153 url = args.domain
154 selector_path = args.selector
155 config_path = f"{selector_path}/config.js"
156 data_path = f"{selector_path}/data"
157 versions = {}
158
159 with tempfile.TemporaryDirectory() as tmp_dir:
160 # download all profiles.json files
161 os.system(
162 f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}"
163 )
164
165 # delete empty folders
166 os.system(f"find {tmp_dir}/* -type d -empty -delete")
167
168 # create overview.json files
169 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(
170 f"{tmp_dir}/*/releases/*"
171 ):
172 release = os.path.basename(path)
173 base = path[len(tmp_dir) + 1 :]
174
175 versions[release.upper()] = f"data/{release}/overview.json"
176 os.system(f"mkdir -p {selector_path}/data/{release}/")
177
178 profiles = {}
179 for ppath in Path(path).rglob("profiles.json"):
180 with open(ppath, "r") as file:
181 profiles[ppath] = file.read()
182
183 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
184 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
185
186 # write overview.json
187 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
188 if args.formatted:
189 json.dump(output, outfile, indent=" ", sort_keys=True)
190 else:
191 json.dump(output, outfile, sort_keys=True)
192
193 update_config(config_path, versions)
194
195
196 """
197 Find and merge json files for a single release.
198 """
199
200
201 def merge(args):
202 input_paths = args.input_path
203 # OpenWrt JSON device files
204 profiles = {}
205
206 def add_path(path):
207 with open(path, "r") as file:
208 profiles[path] = file.read()
209
210 for path in input_paths:
211 if os.path.isdir(path):
212 for filepath in Path(path).rglob("*.json"):
213 add_path(filepath)
214 else:
215 if not path.endswith(".json"):
216 sys.stderr.write(f"Folder does not exists: {path}\n")
217 exit(1)
218 add_path(path)
219
220 output = merge_profiles(profiles, args.download_url)
221
222 if args.formatted:
223 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
224 else:
225 json.dump(output, sys.stdout, sort_keys=True)
226
227
228 def main():
229 parser = argparse.ArgumentParser()
230 parser.add_argument(
231 "--formatted", action="store_true", help="Output formatted JSON data."
232 )
233 subparsers = parser.add_subparsers(dest="action", required=True)
234
235 parser_merge = subparsers.add_parser(
236 "merge",
237 help="Create a grid structure with horizontal and vertical connections.",
238 )
239 parser_merge.add_argument(
240 "input_path",
241 nargs="+",
242 help="Input folder that is traversed for OpenWrt JSON device files.",
243 )
244 parser_merge.add_argument(
245 "--download-url",
246 action="store",
247 default="",
248 help="Link to get the image from. May contain {target}, {version} and {commit}",
249 )
250
251 parser_scrape = subparsers.add_parser(
252 "scrape",
253 help="Create a grid structure of horizontal, vertical and vertical connections.",
254 )
255 parser_scrape.add_argument(
256 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
257 )
258 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
259 parser_scrape.add_argument(
260 "--use-wget", action="store_true", help="Use wget to scrape the site."
261 )
262
263 args = parser.parse_args()
264
265 if args.action == "merge":
266 merge(args)
267
268 if args.action == "scrape":
269 if args.use_wget:
270 scrape_wget(args)
271 else:
272 scrape(args)
273
274
275 if __name__ == "__main__":
276 main()