df18e278b485634095331faf880dd9b89c4d5f40
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18
19 # accepts {<file-path>: <file-content>}
20 def merge_profiles(profiles, download_url):
21 # json output data
22 output = {}
23
24 def get_title(title):
25 if "title" in title:
26 return title["title"]
27 else:
28 return "{} {} {}".format(
29 title.get("vendor", ""), title["model"], title.get("variant", "")
30 ).strip()
31
32 def add_profile(id, target, profile, code=None):
33 images = []
34 for image in profile["images"]:
35 images.append({"name": image["name"], "type": image["type"]})
36
37 if target is None:
38 target = profile["target"]
39
40 for entry in profile["titles"]:
41 title = get_title(entry)
42
43 if len(title) == 0:
44 sys.stderr.write(f"Empty title. Skip title in {path}\n")
45 continue
46
47 output["models"][title] = {"id": id, "target": target, "images": images}
48
49 if code is not None:
50 output["models"][title]["code"] = code
51
52 for path, content in profiles.items():
53 obj = json.loads(content)
54
55 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
56 sys.stderr.write(
57 f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n"
58 )
59 continue
60
61 code = obj.get("version_code", obj.get("version_commit"))
62
63 if "version_code" not in output:
64 output = {"version_code": code, "download_url": download_url, "models": {}}
65
66 # if we have mixed codes/commits, store in device object
67 if output["version_code"] == code:
68 code = None
69
70 try:
71 if "profiles" in obj:
72 for id in obj["profiles"]:
73 add_profile(id, obj.get("target"), obj["profiles"][id], code)
74 else:
75 add_profile(obj["id"], obj["target"], obj, code)
76 except json.decoder.JSONDecodeError as e:
77 sys.stderr.write(f"Skip {path}\n {e}\n")
78 except KeyError as e:
79 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
80 exit(1)
81
82 return output
83
84
85 def update_config(config_path, versions):
86 content = ""
87 with open(config_path, "r") as file:
88 content = file.read()
89
90 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}", content)
91 with open(config_path, "w+") as file:
92 file.write(content)
93
94
95 """
96 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
97 Merge into overview.json files.
98 Update config.json.
99 """
100
101
102 def scrape(args):
103 url = args.domain
104 selector_path = args.selector
105 config_path = f"{selector_path}/config.js"
106 data_path = f"{selector_path}/data"
107 versions = {}
108
109 def handle_release(target):
110 profiles = {}
111 with urllib.request.urlopen(f"{target}/?json") as file:
112 array = json.loads(file.read().decode("utf-8"))
113 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
114 with urllib.request.urlopen(f"{target}/{profile}") as file:
115 profiles[f"{target}/{profile}"] = file.read()
116 return profiles
117
118 if not os.path.isfile(config_path):
119 print(f"file not found: {config_path}")
120 exit(1)
121
122 # fetch release URLs
123 with urllib.request.urlopen(url) as infile:
124 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
125 if not path.startswith("/") and path.endswith("targets/"):
126 release = path.strip("/").split("/")[-2]
127 download_url = f"{url}/{path}/{{target}}"
128
129 profiles = handle_release(f"{url}/{path}")
130 output = merge_profiles(profiles, download_url)
131 if len(output) > 0:
132 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
133 # write overview.json
134 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
135 if args.formatted:
136 json.dump(output, outfile, indent=" ", sort_keys=True)
137 else:
138 json.dump(output, outfile, sort_keys=True)
139
140 versions[release.upper()] = f"data/{release}/overview.json"
141
142 update_config(config_path, versions)
143
144
145 """
146 Scrape profiles.json using wget (slower but more generic).
147 Merge into overview.json files.
148 Update config.json.
149 """
150
151
152 def scrape_wget(args):
153 url = args.domain
154 selector_path = args.selector
155 config_path = f"{selector_path}/config.js"
156 data_path = f"{selector_path}/data"
157 versions = {}
158
159 with tempfile.TemporaryDirectory() as tmp_dir:
160 # download all profiles.json files
161 os.system(
162 f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}"
163 )
164
165 # delete empty folders
166 os.system(f"find {tmp_dir}/* -type d -empty -delete")
167
168 # create overview.json files
169 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(
170 f"{tmp_dir}/*/releases/*"
171 ):
172 release = os.path.basename(path)
173 base = path[len(tmp_dir) + 1 :]
174
175 profiles = {}
176 for ppath in Path(path).rglob("profiles.json"):
177 with open(ppath, "r") as file:
178 profiles[ppath] = file.read()
179
180 if len(profiles) == 0:
181 continue
182
183 versions[release.upper()] = f"data/{release}/overview.json"
184 os.system(f"mkdir -p {selector_path}/data/{release}/")
185
186 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
187 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
188
189 # write overview.json
190 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
191 if args.formatted:
192 json.dump(output, outfile, indent=" ", sort_keys=True)
193 else:
194 json.dump(output, outfile, sort_keys=True)
195
196 update_config(config_path, versions)
197
198
199 """
200 Find and merge json files for a single release.
201 """
202
203
204 def merge(args):
205 input_paths = args.input_path
206 # OpenWrt JSON device files
207 profiles = {}
208
209 def add_path(path):
210 with open(path, "r") as file:
211 profiles[path] = file.read()
212
213 for path in input_paths:
214 if os.path.isdir(path):
215 for filepath in Path(path).rglob("*.json"):
216 add_path(filepath)
217 else:
218 if not path.endswith(".json"):
219 sys.stderr.write(f"Folder does not exists: {path}\n")
220 exit(1)
221 add_path(path)
222
223 output = merge_profiles(profiles, args.download_url)
224
225 if args.formatted:
226 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
227 else:
228 json.dump(output, sys.stdout, sort_keys=True)
229
230
231 def main():
232 parser = argparse.ArgumentParser()
233 parser.add_argument(
234 "--formatted", action="store_true", help="Output formatted JSON data."
235 )
236 subparsers = parser.add_subparsers(dest="action", required=True)
237
238 parser_merge = subparsers.add_parser(
239 "merge",
240 help="Create a grid structure with horizontal and vertical connections.",
241 )
242 parser_merge.add_argument(
243 "input_path",
244 nargs="+",
245 help="Input folder that is traversed for OpenWrt JSON device files.",
246 )
247 parser_merge.add_argument(
248 "--download-url",
249 action="store",
250 default="",
251 help="Link to get the image from. May contain {target}, {version} and {commit}",
252 )
253
254 parser_scrape = subparsers.add_parser(
255 "scrape",
256 help="Create a grid structure of horizontal, vertical and vertical connections.",
257 )
258 parser_scrape.add_argument(
259 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
260 )
261 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
262 parser_scrape.add_argument(
263 "--use-wget", action="store_true", help="Use wget to scrape the site."
264 )
265
266 args = parser.parse_args()
267
268 if args.action == "merge":
269 merge(args)
270
271 if args.action == "scrape":
272 if args.use_wget:
273 scrape_wget(args)
274 else:
275 scrape(args)
276
277
278 if __name__ == "__main__":
279 main()