93410c03d0874281ca3d867324d5fba9a8a64a6e
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2
3 from pathlib import Path
4 import urllib.request
5 import tempfile
6 import argparse
7 import json
8 import glob
9 import sys
10 import os
11 import re
12
13 '''
14 Tool to create overview.json files and update the config.js.
15 '''
16
17 parser = argparse.ArgumentParser()
18 parser.add_argument("--formatted", action="store_true",
19 help="Output formatted JSON data.")
20 subparsers = parser.add_subparsers(dest="action", required=True)
21
22 parser_merge = subparsers.add_parser("merge",
23 help="Create a grid structure with horizontal and vertical connections.")
24 parser_merge.add_argument("input_path", nargs="+",
25 help="Input folder that is traversed for OpenWrt JSON device files.")
26 parser_merge.add_argument("--download-url", action="store", default="",
27 help="Link to get the image from. May contain {target}, {version} and {commit}")
28 #parser_merge.add_argument("--change-prefix",
29 # help="Change the openwrt- file name prefix.")
30
31 parser_scrape = subparsers.add_parser("scrape",
32 help="Create a grid structure of horizontal, vertical and vertical connections.")
33 parser_scrape.add_argument("domain",
34 help="Domain to scrape. E.g. https://downloads.openwrt.org")
35 parser_scrape.add_argument("selector",
36 help="Path the config.js file is in.")
37 parser_scrape.add_argument("--use-wget", action="store_true",
38 help="Use wget to scrape the site.")
39
40 args = parser.parse_args()
41
42 SUPPORTED_METADATA_VERSION = 1
43
44 # accepts {<file-path>: <file-content>}
45 def merge_profiles(profiles, download_url):
46 # json output data
47 output = {}
48
49 def get_title_name(title):
50 if "title" in title:
51 return title["title"]
52 else:
53 return "{} {} {}".format(title.get("vendor", ""), title["model"], title.get("variant", "")).strip()
54
55 def add_profile(id, target, profile, code=None):
56 images = []
57 for image in profile["images"]:
58 images.append({"name": image["name"], "type": image["type"]})
59
60 if target is None:
61 target = profile["target"]
62
63 #if args.change_prefix:
64 # change_prefix(images, "openwrt-", args.change_prefix)
65
66 for title in profile["titles"]:
67 name = get_title_name(title)
68
69 if len(name) == 0:
70 sys.stderr.write(f"Empty title. Skip title in {path}\n")
71 continue
72
73 output["models"][name] = {"id": id, "target": target, "images": images}
74
75 if code is not None:
76 output["models"][name]["code"] = code
77
78 for path, content in profiles.items():
79 obj = json.loads(content)
80
81 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
82 sys.stderr.write(f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n")
83 continue
84
85 code = obj.get("version_code", obj.get("version_commit"))
86
87 if not "version_code" in output:
88 output = {
89 "version_code": code,
90 "download_url": download_url,
91 "models" : {}
92 }
93
94 # if we have mixed codes/commits, store in device object
95 if output["version_code"] == code:
96 code = None;
97
98 try:
99 if "profiles" in obj:
100 for id in obj["profiles"]:
101 add_profile(id, obj.get("target"), obj["profiles"][id], code)
102 else:
103 add_profile(obj["id"], obj["target"], obj, code)
104 except json.decoder.JSONDecodeError as e:
105 sys.stderr.write(f"Skip {path}\n {e}\n")
106 except KeyError as e:
107 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
108 exit(1)
109
110 return output
111
112 def update_config(config_path, versions):
113 content = ""
114 with open(config_path, "r") as file:
115 content = file.read()
116
117 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}" , content)
118 with open(config_path, "w+") as file:
119 # save updated config
120 file.write(content)
121
122 '''
123 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
124 Merge into overview.json files.
125 Update config.json.
126 '''
127 def scrape(url, selector_path):
128 config_path = f"{selector_path}/config.js"
129 data_path = f"{selector_path}/data"
130 versions = {}
131
132 def handle_release(target):
133 profiles = {}
134 with urllib.request.urlopen(f"{target}/?json") as file:
135 array = json.loads(file.read().decode("utf-8"))
136 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
137 #print(profile)
138 with urllib.request.urlopen(f"{target}/{profile}") as file:
139 profiles[f"{target}/{profile}"] = file.read()
140 return profiles
141
142 if not os.path.isfile(config_path):
143 print(f"file not found: {config_path}")
144 exit(1)
145
146 # fetch release URLs
147 with urllib.request.urlopen(url) as infile:
148 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
149 if not path.startswith("/") and path.endswith("targets/"):
150 release = path.strip("/").split("/")[-2]
151 download_url = f"{url}/{path}/{{target}}"
152
153 profiles = handle_release(f"{url}/{path}")
154 output = merge_profiles(profiles, download_url)
155 if len(output) > 0:
156 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
157 # write overview.json
158 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
159 if args.formatted:
160 json.dump(output, outfile, indent=" ", sort_keys=True)
161 else:
162 json.dump(output, outfile, sort_keys=True)
163
164 versions[release.upper()] = f"data/{release}/overview.json"
165
166 update_config(config_path, versions)
167
168 '''
169 Scrape profiles.json using wget (slower but more generic).
170 Merge into overview.json files.
171 Update config.json.
172 '''
173 def scrape_wget(url, selector_path):
174 config_path = f"{selector_path}/config.js"
175 data_path = f"{selector_path}/data"
176 versions = {}
177
178 with tempfile.TemporaryDirectory() as tmp_dir:
179 # download all profiles.json files
180 os.system(f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}")
181
182 # delete empty folders
183 os.system(f"find {tmp_dir}/* -type d -empty -delete")
184
185 # create overview.json files
186 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(f"{tmp_dir}/*/releases/*"):
187 release = os.path.basename(path)
188 base = path[len(tmp_dir)+1:]
189
190 versions[release.upper()] = f"data/{release}/overview.json"
191 os.system(f"mkdir -p {selector_path}/data/{release}/")
192
193 profiles = {}
194 for ppath in Path(path).rglob('profiles.json'):
195 with open(ppath, "r") as file:
196 profiles[ppath] = file.read()
197
198 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
199 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
200
201 # write overview.json
202 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
203 if args.formatted:
204 json.dump(output, outfile, indent=" ", sort_keys=True)
205 else:
206 json.dump(output, outfile, sort_keys=True)
207
208 update_config(config_path, versions)
209
210 '''
211 def change_prefix(images, old_prefix, new_prefix):
212 for image in images:
213 if image["name"].startswith(old_prefix):
214 image["name"] = new_prefix + image["name"][len(old_prefix):]
215 '''
216
217 '''
218 Find and merge json files for a single release.
219 '''
220 def merge(input_paths):
221 # OpenWrt JSON device files
222 profiles = {}
223
224 def add_path(path):
225 #paths.append(path)
226 with open(path, "r") as file:
227 profiles[path] = file.read()
228
229 for path in input_paths:
230 if os.path.isdir(path):
231 for filepath in Path(path).rglob("*.json"):
232 add_path(filepath)
233 else:
234 if not path.endswith(".json"):
235 sys.stderr.write(f"Folder does not exists: {path}\n")
236 exit(1)
237 add_path(path)
238
239 output = merge_profiles(profiles, args.download_url)
240
241 if args.formatted:
242 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
243 else:
244 json.dump(output, sys.stdout, sort_keys=True)
245
246 if args.action == "merge":
247 merge(args.input_path)
248
249 if args.action == "scrape":
250 if args.use_wget:
251 scrape_wget(args.domain, args.selector)
252 else:
253 scrape(args.domain, args.selector)