do no delete data folder
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2
3 from pathlib import Path
4 import urllib.request
5 import argparse
6 import json
7 import sys
8 import os
9 import re
10
11
12 parser = argparse.ArgumentParser()
13 parser.add_argument("--formatted", action="store_true",
14 help="Output formatted JSON data.")
15 subparsers = parser.add_subparsers(dest='action', required=True)
16
17 parser_merge = subparsers.add_parser('merge',
18 help='Create a grid structure with horizontal and vertical connections.')
19 parser_merge.add_argument("input_path", nargs="+",
20 help="Input folder that is traversed for OpenWrt JSON device files.")
21 parser_merge.add_argument("--download-url", action="store", default="",
22 help="Link to get the image from. May contain {target}, {version} and {commit}")
23 #parser_merge.add_argument("--change-prefix",
24 # help="Change the openwrt- file name prefix.")
25
26 parser_scrape = subparsers.add_parser('scrape',
27 help='Create a grid structure of horizontal, vertical and vertical connections.')
28 parser_scrape.add_argument('domain',
29 help='Domain to scrape. E.g. https://downloads.openwrt.org')
30 parser_scrape.add_argument('selector',
31 help='Path the config.js file is in.')
32
33 args = parser.parse_args()
34
35 SUPPORTED_METADATA_VERSION = 1
36
37 # accepts {<file-path>: <file-content>}
38 def merge_profiles(profiles, download_url):
39 # json output data
40 output = {}
41
42 def get_title_name(title):
43 if "title" in title:
44 return title["title"]
45 else:
46 return "{} {} {}".format(title.get("vendor", ""), title["model"], title.get("variant", "")).strip()
47
48 def add_profile(id, target, profile, code=None):
49 images = []
50 for image in profile["images"]:
51 images.append({"name": image["name"], "type": image["type"]})
52
53 if target is None:
54 target = profile["target"]
55
56 #if args.change_prefix:
57 # change_prefix(images, "openwrt-", args.change_prefix)
58
59 for title in profile["titles"]:
60 name = get_title_name(title)
61
62 if len(name) == 0:
63 sys.stderr.write(f"Empty title. Skip title in {path}\n")
64 continue
65
66 output["models"][name] = {"id": id, "target": target, "images": images}
67
68 if code is not None:
69 output["models"][name]["code"] = code
70
71 for path, content in profiles.items():
72 obj = json.loads(content)
73
74 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
75 sys.stderr.write(f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n")
76 continue
77
78 code = obj.get("version_code", obj.get("version_commit"))
79
80 if not "version_code" in output:
81 output = {
82 "version_code": code,
83 "download_url": download_url,
84 "models" : {}
85 }
86
87 # if we have mixed codes/commits, store in device object
88 if output["version_code"] == code:
89 code = None;
90
91 try:
92 if "profiles" in obj:
93 for id in obj["profiles"]:
94 add_profile(id, obj.get("target"), obj["profiles"][id], code)
95 else:
96 add_profile(obj["id"], obj["target"], obj, code)
97 except json.decoder.JSONDecodeError as e:
98 sys.stderr.write(f"Skip {path}\n {e}\n")
99 except KeyError as e:
100 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
101 exit(1)
102
103 return output
104
105 def scrape(url, selector_path):
106 config_path = f"{selector_path}/config.js"
107 data_path = f"{selector_path}/data"
108 versions = {}
109
110 def update_config(config_path, versions):
111 content = ''
112 with open(config_path, 'r') as file:
113 content = file.read()
114
115 content = re.sub('versions:[\\s]*{[^}]*}', f'versions: {versions}' , content)
116 with open(config_path, 'w+') as file:
117 # save updated config
118 file.write(content)
119
120 def handle_release(target):
121 profiles = {}
122 with urllib.request.urlopen(f"{target}/?json") as file:
123 array = json.loads(file.read().decode('utf-8'))
124 for profile in filter(lambda x: x.endswith('/profiles.json'), array):
125 #print(profile)
126 with urllib.request.urlopen(f"{target}/{profile}") as file:
127 profiles[f"{target}/{profile}"] = file.read()
128 return profiles
129
130 if not os.path.isfile(config_path):
131 print(f"file not found: {config_path}")
132 exit(1)
133
134 # fetch release URLs
135 with urllib.request.urlopen(url) as infile:
136 for path in re.findall(r'href=["\']?([^\'" >]+)', str(infile.read())):
137 if not path.startswith('/') and path.endswith('targets/'):
138 release = path.strip('/').split('/')[-2]
139 download_url = f"{url}/{path}/{{target}}"
140
141 profiles = handle_release(f"{url}/{path}")
142 output = merge_profiles(profiles, download_url)
143 if len(output) > 0:
144 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
145 # write overview.json
146 with open(f"{data_path}/{release}/overview.json", 'w') as outfile:
147 if args.formatted:
148 json.dump(output, outfile, indent=" ", sort_keys=True)
149 else:
150 json.dump(output, outfile, sort_keys=True)
151
152 versions[release.upper()] = f"data/{release}/overview.json"
153
154 update_config(config_path, versions)
155
156 '''
157 def change_prefix(images, old_prefix, new_prefix):
158 for image in images:
159 if image["name"].startswith(old_prefix):
160 image["name"] = new_prefix + image["name"][len(old_prefix):]
161 '''
162
163 def merge(input_paths):
164 # OpenWrt JSON device files
165 profiles = {}
166
167 def add_path(path):
168 #paths.append(path)
169 with open(path, "r") as file:
170 profiles[path] = file.read()
171
172 for path in input_paths:
173 if os.path.isdir(path):
174 for filepath in Path(path).rglob("*.json"):
175 add_path(filepath)
176 else:
177 if not path.endswith(".json"):
178 sys.stderr.write(f"Folder does not exists: {path}\n")
179 exit(1)
180 add_path(path)
181
182 output = merge_profiles(profiles, args.download_url)
183
184 if args.formatted:
185 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
186 else:
187 json.dump(output, sys.stdout, sort_keys=True)
188
189 if args.action == "merge":
190 merge(args.input_path)
191
192 if args.action == "scrape":
193 scrape(args.domain, args.selector)