collect.py: cleanup code
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2
3 from pathlib import Path
4 import urllib.request
5 import tempfile
6 import argparse
7 import json
8 import glob
9 import sys
10 import os
11 import re
12
13 """
14 Tool to create overview.json files and update the config.js.
15 """
16
17 parser = argparse.ArgumentParser()
18 parser.add_argument(
19 "--formatted", action="store_true", help="Output formatted JSON data."
20 )
21 subparsers = parser.add_subparsers(dest="action", required=True)
22
23 parser_merge = subparsers.add_parser(
24 "merge", help="Create a grid structure with horizontal and vertical connections."
25 )
26 parser_merge.add_argument(
27 "input_path",
28 nargs="+",
29 help="Input folder that is traversed for OpenWrt JSON device files.",
30 )
31 parser_merge.add_argument(
32 "--download-url",
33 action="store",
34 default="",
35 help="Link to get the image from. May contain {target}, {version} and {commit}",
36 )
37
38 parser_scrape = subparsers.add_parser(
39 "scrape",
40 help="Create a grid structure of horizontal, vertical and vertical connections.",
41 )
42 parser_scrape.add_argument(
43 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
44 )
45 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
46 parser_scrape.add_argument(
47 "--use-wget", action="store_true", help="Use wget to scrape the site."
48 )
49
50 args = parser.parse_args()
51
52 SUPPORTED_METADATA_VERSION = 1
53
54 # accepts {<file-path>: <file-content>}
55 def merge_profiles(profiles, download_url):
56 # json output data
57 output = {}
58
59 def get_title(title):
60 if "title" in title:
61 return title["title"]
62 else:
63 return "{} {} {}".format(
64 title.get("vendor", ""), title["model"], title.get("variant", "")
65 ).strip()
66
67 def add_profile(id, target, profile, code=None):
68 images = []
69 for image in profile["images"]:
70 images.append({"name": image["name"], "type": image["type"]})
71
72 if target is None:
73 target = profile["target"]
74
75 for entry in profile["titles"]:
76 title = get_title(entry)
77
78 if len(title) == 0:
79 sys.stderr.write(f"Empty title. Skip title in {path}\n")
80 continue
81
82 output["models"][title] = {"id": id, "target": target, "images": images}
83
84 if code is not None:
85 output["models"][title]["code"] = code
86
87 for path, content in profiles.items():
88 obj = json.loads(content)
89
90 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
91 sys.stderr.write(
92 f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n"
93 )
94 continue
95
96 code = obj.get("version_code", obj.get("version_commit"))
97
98 if not "version_code" in output:
99 output = {"version_code": code, "download_url": download_url, "models": {}}
100
101 # if we have mixed codes/commits, store in device object
102 if output["version_code"] == code:
103 code = None
104
105 try:
106 if "profiles" in obj:
107 for id in obj["profiles"]:
108 add_profile(id, obj.get("target"), obj["profiles"][id], code)
109 else:
110 add_profile(obj["id"], obj["target"], obj, code)
111 except json.decoder.JSONDecodeError as e:
112 sys.stderr.write(f"Skip {path}\n {e}\n")
113 except KeyError as e:
114 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
115 exit(1)
116
117 return output
118
119
120 def update_config(config_path, versions):
121 content = ""
122 with open(config_path, "r") as file:
123 content = file.read()
124
125 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}", content)
126 with open(config_path, "w+") as file:
127 file.write(content)
128
129
130 """
131 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
132 Merge into overview.json files.
133 Update config.json.
134 """
135
136
137 def scrape(url, selector_path):
138 config_path = f"{selector_path}/config.js"
139 data_path = f"{selector_path}/data"
140 versions = {}
141
142 def handle_release(target):
143 profiles = {}
144 with urllib.request.urlopen(f"{target}/?json") as file:
145 array = json.loads(file.read().decode("utf-8"))
146 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
147 with urllib.request.urlopen(f"{target}/{profile}") as file:
148 profiles[f"{target}/{profile}"] = file.read()
149 return profiles
150
151 if not os.path.isfile(config_path):
152 print(f"file not found: {config_path}")
153 exit(1)
154
155 # fetch release URLs
156 with urllib.request.urlopen(url) as infile:
157 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
158 if not path.startswith("/") and path.endswith("targets/"):
159 release = path.strip("/").split("/")[-2]
160 download_url = f"{url}/{path}/{{target}}"
161
162 profiles = handle_release(f"{url}/{path}")
163 output = merge_profiles(profiles, download_url)
164 if len(output) > 0:
165 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
166 # write overview.json
167 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
168 if args.formatted:
169 json.dump(output, outfile, indent=" ", sort_keys=True)
170 else:
171 json.dump(output, outfile, sort_keys=True)
172
173 versions[release.upper()] = f"data/{release}/overview.json"
174
175 update_config(config_path, versions)
176
177
178 """
179 Scrape profiles.json using wget (slower but more generic).
180 Merge into overview.json files.
181 Update config.json.
182 """
183
184
185 def scrape_wget(url, selector_path):
186 config_path = f"{selector_path}/config.js"
187 data_path = f"{selector_path}/data"
188 versions = {}
189
190 with tempfile.TemporaryDirectory() as tmp_dir:
191 # download all profiles.json files
192 os.system(
193 f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}"
194 )
195
196 # delete empty folders
197 os.system(f"find {tmp_dir}/* -type d -empty -delete")
198
199 # create overview.json files
200 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(
201 f"{tmp_dir}/*/releases/*"
202 ):
203 release = os.path.basename(path)
204 base = path[len(tmp_dir) + 1 :]
205
206 versions[release.upper()] = f"data/{release}/overview.json"
207 os.system(f"mkdir -p {selector_path}/data/{release}/")
208
209 profiles = {}
210 for ppath in Path(path).rglob("profiles.json"):
211 with open(ppath, "r") as file:
212 profiles[ppath] = file.read()
213
214 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
215 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
216
217 # write overview.json
218 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
219 if args.formatted:
220 json.dump(output, outfile, indent=" ", sort_keys=True)
221 else:
222 json.dump(output, outfile, sort_keys=True)
223
224 update_config(config_path, versions)
225
226
227 """
228 Find and merge json files for a single release.
229 """
230
231
232 def merge(input_paths):
233 # OpenWrt JSON device files
234 profiles = {}
235
236 def add_path(path):
237 with open(path, "r") as file:
238 profiles[path] = file.read()
239
240 for path in input_paths:
241 if os.path.isdir(path):
242 for filepath in Path(path).rglob("*.json"):
243 add_path(filepath)
244 else:
245 if not path.endswith(".json"):
246 sys.stderr.write(f"Folder does not exists: {path}\n")
247 exit(1)
248 add_path(path)
249
250 output = merge_profiles(profiles, args.download_url)
251
252 if args.formatted:
253 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
254 else:
255 json.dump(output, sys.stdout, sort_keys=True)
256
257
258 if args.action == "merge":
259 merge(args.input_path)
260
261 if args.action == "scrape":
262 if args.use_wget:
263 scrape_wget(args.domain, args.selector)
264 else:
265 scrape(args.domain, args.selector)