29688e567491abef769f9f402cdf5e21dd76b024
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2
3 from pathlib import Path
4 import urllib.request
5 import tempfile
6 import argparse
7 import json
8 import glob
9 import sys
10 import os
11 import re
12
13 """
14 Tool to create overview.json files and update the config.js.
15 """
16
17 parser = argparse.ArgumentParser()
18 parser.add_argument(
19 "--formatted", action="store_true", help="Output formatted JSON data."
20 )
21 subparsers = parser.add_subparsers(dest="action", required=True)
22
23 parser_merge = subparsers.add_parser(
24 "merge", help="Create a grid structure with horizontal and vertical connections."
25 )
26 parser_merge.add_argument(
27 "input_path",
28 nargs="+",
29 help="Input folder that is traversed for OpenWrt JSON device files.",
30 )
31 parser_merge.add_argument(
32 "--download-url",
33 action="store",
34 default="",
35 help="Link to get the image from. May contain {target}, {version} and {commit}",
36 )
37 # parser_merge.add_argument("--change-prefix",
38 # help="Change the openwrt- file name prefix.")
39
40 parser_scrape = subparsers.add_parser(
41 "scrape",
42 help="Create a grid structure of horizontal, vertical and vertical connections.",
43 )
44 parser_scrape.add_argument(
45 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
46 )
47 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
48 parser_scrape.add_argument(
49 "--use-wget", action="store_true", help="Use wget to scrape the site."
50 )
51
52 args = parser.parse_args()
53
54 SUPPORTED_METADATA_VERSION = 1
55
56 # accepts {<file-path>: <file-content>}
57 def merge_profiles(profiles, download_url):
58 # json output data
59 output = {}
60
61 def get_title(title):
62 if "title" in title:
63 return title["title"]
64 else:
65 return "{} {} {}".format(
66 title.get("vendor", ""), title["model"], title.get("variant", "")
67 ).strip()
68
69 def add_profile(id, target, profile, code=None):
70 images = []
71 for image in profile["images"]:
72 images.append({"name": image["name"], "type": image["type"]})
73
74 if target is None:
75 target = profile["target"]
76
77 # if args.change_prefix:
78 # change_prefix(images, "openwrt-", args.change_prefix)
79
80 for entry in profile["titles"]:
81 title = get_title(entry)
82
83 if len(title) == 0:
84 sys.stderr.write(f"Empty title. Skip title in {path}\n")
85 continue
86
87 output["models"][title] = {"id": id, "target": target, "images": images}
88
89 if code is not None:
90 output["models"][title]["code"] = code
91
92 for path, content in profiles.items():
93 obj = json.loads(content)
94
95 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
96 sys.stderr.write(
97 f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n"
98 )
99 continue
100
101 code = obj.get("version_code", obj.get("version_commit"))
102
103 if not "version_code" in output:
104 output = {"version_code": code, "download_url": download_url, "models": {}}
105
106 # if we have mixed codes/commits, store in device object
107 if output["version_code"] == code:
108 code = None
109
110 try:
111 if "profiles" in obj:
112 for id in obj["profiles"]:
113 add_profile(id, obj.get("target"), obj["profiles"][id], code)
114 else:
115 add_profile(obj["id"], obj["target"], obj, code)
116 except json.decoder.JSONDecodeError as e:
117 sys.stderr.write(f"Skip {path}\n {e}\n")
118 except KeyError as e:
119 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
120 exit(1)
121
122 return output
123
124
125 def update_config(config_path, versions):
126 content = ""
127 with open(config_path, "r") as file:
128 content = file.read()
129
130 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}", content)
131 with open(config_path, "w+") as file:
132 # save updated config
133 file.write(content)
134
135
136 """
137 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
138 Merge into overview.json files.
139 Update config.json.
140 """
141
142
143 def scrape(url, selector_path):
144 config_path = f"{selector_path}/config.js"
145 data_path = f"{selector_path}/data"
146 versions = {}
147
148 def handle_release(target):
149 profiles = {}
150 with urllib.request.urlopen(f"{target}/?json") as file:
151 array = json.loads(file.read().decode("utf-8"))
152 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
153 # print(profile)
154 with urllib.request.urlopen(f"{target}/{profile}") as file:
155 profiles[f"{target}/{profile}"] = file.read()
156 return profiles
157
158 if not os.path.isfile(config_path):
159 print(f"file not found: {config_path}")
160 exit(1)
161
162 # fetch release URLs
163 with urllib.request.urlopen(url) as infile:
164 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
165 if not path.startswith("/") and path.endswith("targets/"):
166 release = path.strip("/").split("/")[-2]
167 download_url = f"{url}/{path}/{{target}}"
168
169 profiles = handle_release(f"{url}/{path}")
170 output = merge_profiles(profiles, download_url)
171 if len(output) > 0:
172 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
173 # write overview.json
174 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
175 if args.formatted:
176 json.dump(output, outfile, indent=" ", sort_keys=True)
177 else:
178 json.dump(output, outfile, sort_keys=True)
179
180 versions[release.upper()] = f"data/{release}/overview.json"
181
182 update_config(config_path, versions)
183
184
185 """
186 Scrape profiles.json using wget (slower but more generic).
187 Merge into overview.json files.
188 Update config.json.
189 """
190
191
192 def scrape_wget(url, selector_path):
193 config_path = f"{selector_path}/config.js"
194 data_path = f"{selector_path}/data"
195 versions = {}
196
197 with tempfile.TemporaryDirectory() as tmp_dir:
198 # download all profiles.json files
199 os.system(
200 f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}"
201 )
202
203 # delete empty folders
204 os.system(f"find {tmp_dir}/* -type d -empty -delete")
205
206 # create overview.json files
207 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(
208 f"{tmp_dir}/*/releases/*"
209 ):
210 release = os.path.basename(path)
211 base = path[len(tmp_dir) + 1 :]
212
213 versions[release.upper()] = f"data/{release}/overview.json"
214 os.system(f"mkdir -p {selector_path}/data/{release}/")
215
216 profiles = {}
217 for ppath in Path(path).rglob("profiles.json"):
218 with open(ppath, "r") as file:
219 profiles[ppath] = file.read()
220
221 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
222 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
223
224 # write overview.json
225 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
226 if args.formatted:
227 json.dump(output, outfile, indent=" ", sort_keys=True)
228 else:
229 json.dump(output, outfile, sort_keys=True)
230
231 update_config(config_path, versions)
232
233
234 """
235 def change_prefix(images, old_prefix, new_prefix):
236 for image in images:
237 if image["name"].startswith(old_prefix):
238 image["name"] = new_prefix + image["name"][len(old_prefix):]
239 """
240
241 """
242 Find and merge json files for a single release.
243 """
244
245
246 def merge(input_paths):
247 # OpenWrt JSON device files
248 profiles = {}
249
250 def add_path(path):
251 # paths.append(path)
252 with open(path, "r") as file:
253 profiles[path] = file.read()
254
255 for path in input_paths:
256 if os.path.isdir(path):
257 for filepath in Path(path).rglob("*.json"):
258 add_path(filepath)
259 else:
260 if not path.endswith(".json"):
261 sys.stderr.write(f"Folder does not exists: {path}\n")
262 exit(1)
263 add_path(path)
264
265 output = merge_profiles(profiles, args.download_url)
266
267 if args.formatted:
268 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
269 else:
270 json.dump(output, sys.stdout, sort_keys=True)
271
272
273 if args.action == "merge":
274 merge(args.input_path)
275
276 if args.action == "scrape":
277 if args.use_wget:
278 scrape_wget(args.domain, args.selector)
279 else:
280 scrape(args.domain, args.selector)