allow collect.py script to script sites like downloads.openwrt.org
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2
3 from pathlib import Path
4 import urllib.request
5 import argparse
6 import shutil
7 import json
8 import sys
9 import os
10 import re
11
12
13 parser = argparse.ArgumentParser()
14 parser.add_argument("--formatted", action="store_true",
15 help="Output formatted JSON data.")
16 subparsers = parser.add_subparsers(dest='action', required=True)
17
18 parser_merge = subparsers.add_parser('merge',
19 help='Create a grid structure with horizontal and vertical connections.')
20 parser_merge.add_argument("input_path", nargs="+",
21 help="Input folder that is traversed for OpenWrt JSON device files.")
22 parser_merge.add_argument("--download-url", action="store", default="",
23 help="Link to get the image from. May contain {target}, {version} and {commit}")
24 #parser_merge.add_argument("--change-prefix",
25 # help="Change the openwrt- file name prefix.")
26
27 parser_scrape = subparsers.add_parser('scrape',
28 help='Create a grid structure of horizontal, vertical and vertical connections.')
29 parser_scrape.add_argument('domain',
30 help='Domain to scrape. E.g. https://downloads.openwrt.org')
31 parser_scrape.add_argument('selector',
32 help='Path the config.js file is in.')
33
34 args = parser.parse_args()
35
36 SUPPORTED_METADATA_VERSION = 1
37
38 # accepts {<file-path>: <file-content>}
39 def merge_profiles(profiles, download_url):
40 # json output data
41 output = {}
42
43 def get_title_name(title):
44 if "title" in title:
45 return title["title"]
46 else:
47 return "{} {} {}".format(title.get("vendor", ""), title["model"], title.get("variant", "")).strip()
48
49 def add_profile(id, target, profile, code=None):
50 images = []
51 for image in profile["images"]:
52 images.append({"name": image["name"], "type": image["type"]})
53
54 if target is None:
55 target = profile["target"]
56
57 #if args.change_prefix:
58 # change_prefix(images, "openwrt-", args.change_prefix)
59
60 for title in profile["titles"]:
61 name = get_title_name(title)
62
63 if len(name) == 0:
64 sys.stderr.write(f"Empty title. Skip title in {path}\n")
65 continue
66
67 output["models"][name] = {"id": id, "target": target, "images": images}
68
69 if code is not None:
70 output["models"][name]["code"] = code
71
72 for path, content in profiles.items():
73 obj = json.loads(content)
74
75 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
76 sys.stderr.write(f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n")
77 continue
78
79 code = obj.get("version_code", obj.get("version_commit"))
80
81 if not "version_code" in output:
82 output = {
83 "version_code": code,
84 "download_url": download_url,
85 "models" : {}
86 }
87
88 # if we have mixed codes/commits, store in device object
89 if output["version_code"] == code:
90 code = None;
91
92 try:
93 if "profiles" in obj:
94 for id in obj["profiles"]:
95 add_profile(id, obj.get("target"), obj["profiles"][id], code)
96 else:
97 add_profile(obj["id"], obj["target"], obj, code)
98 except json.decoder.JSONDecodeError as e:
99 sys.stderr.write(f"Skip {path}\n {e}\n")
100 except KeyError as e:
101 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
102 exit(1)
103
104 return output
105
106 def scrape(url, selector_path):
107 config_path = f"{selector_path}/config.js"
108 data_path = f"{selector_path}/data"
109 versions = {}
110
111 def update_config(config_path, versions):
112 content = ''
113 with open(config_path, 'r') as file:
114 content = file.read()
115
116 content = re.sub('versions:[\\s]*{[^}]*}', f'versions: {versions}' , content)
117 with open(config_path, 'w+') as file:
118 # save updated config
119 file.write(content)
120
121 def handle_release(target):
122 profiles = {}
123 with urllib.request.urlopen(f"{target}/?json") as file:
124 array = json.loads(file.read().decode('utf-8'))
125 for profile in filter(lambda x: x.endswith('/profiles.json'), array):
126 #print(profile)
127 with urllib.request.urlopen(f"{target}/{profile}") as file:
128 profiles[f"{target}/{profile}"] = file.read()
129 return profiles
130
131 if not os.path.isfile(config_path):
132 print(f"file not found: {config_path}")
133 exit(1)
134
135 shutil.rmtree(data_path, ignore_errors=True)
136
137 # fetch release URLs
138 with urllib.request.urlopen(url) as infile:
139 for path in re.findall(r'href=["\']?([^\'" >]+)', str(infile.read())):
140 if not path.startswith('/') and path.endswith('targets/'):
141 release = path.strip('/').split('/')[-2]
142 download_url = f"{url}/{path}/{{target}}"
143
144 profiles = handle_release(f"{url}/{path}")
145 output = merge_profiles(profiles, download_url)
146 if len(output) > 0:
147 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
148 # write overview.json
149 with open(f"{data_path}/{release}/overview.json", 'w') as outfile:
150 if args.formatted:
151 json.dump(output, outfile, indent=" ", sort_keys=True)
152 else:
153 json.dump(output, outfile, sort_keys=True)
154
155 versions[release.upper()] = f"data/{release}/overview.json"
156
157 update_config(config_path, versions)
158
159 '''
160 def change_prefix(images, old_prefix, new_prefix):
161 for image in images:
162 if image["name"].startswith(old_prefix):
163 image["name"] = new_prefix + image["name"][len(old_prefix):]
164 '''
165
166 def merge(input_paths):
167 # OpenWrt JSON device files
168 profiles = {}
169
170 def add_path(path):
171 #paths.append(path)
172 with open(path, "r") as file:
173 profiles[path] = file.read()
174
175 for path in input_paths:
176 if os.path.isdir(path):
177 for filepath in Path(path).rglob("*.json"):
178 add_path(filepath)
179 else:
180 if not path.endswith(".json"):
181 sys.stderr.write(f"Folder does not exists: {path}\n")
182 exit(1)
183 add_path(path)
184
185 output = merge_profiles(profiles, args.download_url)
186
187 if args.formatted:
188 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
189 else:
190 json.dump(output, sys.stdout, sort_keys=True)
191
192 if args.action == "merge":
193 merge(args.input_path)
194
195 if args.action == "scrape":
196 scrape(args.domain, args.selector)