Changeset View
Changeset View
Standalone View
Standalone View
kde-modules/httpupdate.py
- This file was added.
Property | Old Value | New Value |
---|---|---|
File Mode | null | 100755 |
1 | #!/usr/bin/env python3 | ||||
---|---|---|---|---|---|
2 | | ||||
3 | # Copyright 2019 Sandro Knauß <sknauss@kde.org> | ||||
4 | # Copyright 2019 Volker Krause <vkrause@kde.org> | ||||
5 | # | ||||
6 | # Redistribution and use in source and binary forms, with or without | ||||
7 | # modification, are permitted provided that the following conditions | ||||
8 | # are met: | ||||
9 | # | ||||
10 | # 1. Redistributions of source code must retain the copyright | ||||
11 | # notice, this list of conditions and the following disclaimer. | ||||
12 | # 2. Redistributions in binary form must reproduce the copyright | ||||
13 | # notice, this list of conditions and the following disclaimer in the | ||||
14 | # documentation and/or other materials provided with the distribution. | ||||
15 | # 3. The name of the author may not be used to endorse or promote products | ||||
16 | # derived from this software without specific prior written permission. | ||||
17 | # | ||||
18 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | ||||
19 | # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||||
20 | # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | ||||
21 | # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | ||||
22 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||||
23 | # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | ||||
27 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
28 | | ||||
29 | import collections | ||||
30 | import dns.resolver | ||||
31 | import glob | ||||
32 | import fnmatch | ||||
33 | import os | ||||
34 | import re | ||||
35 | import requests | ||||
36 | import ssl | ||||
37 | import sys | ||||
38 | import urllib.parse | ||||
39 | import urllib.request | ||||
40 | | ||||
41 | RESOLVER = dns.resolver.Resolver() | ||||
42 | RESOLVER.nameservers=["1.1.1.1", "8.8.8.8"] | ||||
43 | | ||||
44 | class HTTPChecker: | ||||
45 | def __init__(self, blacklist): | ||||
46 | self.blacklist = blacklist | ||||
47 | self.reBlacklist = list(map(lambda r:re.compile(r,re.I), blacklist)) | ||||
48 | self.updater = re.compile(r'(https?://[^. \t\n"\'?<>*#()\\][^ \t\n"\'?<>*#()\\]+)', re.I) | ||||
49 | | ||||
50 | def urls(self, fpath): | ||||
51 | urls = set() | ||||
52 | with open(fpath, 'r') as f: | ||||
53 | linenumber = 0 | ||||
54 | for line in f: | ||||
55 | linenumber += 1 | ||||
56 | if any(map(lambda r: r.search(line), self.reBlacklist)): | ||||
57 | continue | ||||
58 | for link in self.updater.findall(line): | ||||
59 | urls.add(link) | ||||
60 | return urls | ||||
61 | | ||||
62 | class Url: | ||||
63 | def __init__(self, url): | ||||
64 | self.url = url | ||||
65 | m = re.match(r"^(https?)://([^/]+)((/.*)$|$)", self.url, re.I) | ||||
66 | self.protocol = m.group(1) | ||||
67 | self.domain = m.group(2) | ||||
68 | self.path = m.group(3) | ||||
69 | self.resolver = RESOLVER | ||||
70 | | ||||
71 | def checkDns(self): | ||||
72 | try: | ||||
73 | self.resolver.query(self.domain, "A") | ||||
74 | return True | ||||
75 | except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): | ||||
76 | pass | ||||
77 | | ||||
78 | try: | ||||
79 | self.resolver.query(self.domain, "AAAA") | ||||
80 | return True | ||||
81 | except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): | ||||
82 | return False | ||||
83 | | ||||
84 | def checkUrl(self): | ||||
85 | try: | ||||
86 | r = requests.get(self.url, timeout=5) | ||||
87 | if r.status_code in (200, 400, 401, 403, 405, 406): | ||||
88 | return True | ||||
89 | except (ssl.SSLError): | ||||
90 | return False | ||||
91 | except (requests.ConnectionError, requests.Timeout): | ||||
92 | pass | ||||
93 | try: | ||||
94 | u = urllib.request.urlopen(self.url, timeout=5) | ||||
95 | if u.status == 200: | ||||
96 | return True | ||||
97 | except (ssl.SSLError): | ||||
98 | return False | ||||
99 | except (urllib.request.HTTPError, urllib.request.URLError): | ||||
100 | return False | ||||
101 | return False | ||||
102 | | ||||
103 | @property | ||||
104 | def secureUrl(self): | ||||
105 | return "https://{}{}".format(self.domain, self.path) | ||||
106 | | ||||
107 | def checkSecureUrl(self): | ||||
108 | try: | ||||
109 | r = requests.get(self.secureUrl, timeout=5) | ||||
110 | if r.status_code in (200, 400, 401, 403, 405, 406): | ||||
111 | return True | ||||
112 | except (ssl.SSLError): | ||||
113 | return False | ||||
114 | except (requests.ConnectionError, requests.Timeout): | ||||
115 | pass | ||||
116 | try: | ||||
117 | u = urllib.request.urlopen(self.secureUrl, timeout=5) | ||||
118 | if u.status == 200: | ||||
119 | return True | ||||
120 | except (ssl.SSLError): | ||||
121 | return False | ||||
122 | except (urllib.request.HTTPError, urllib.request.URLError): | ||||
123 | return False | ||||
124 | return False | ||||
125 | | ||||
126 | def check(self): | ||||
127 | if not self.checkDns(): | ||||
128 | return "checkDns" | ||||
129 | | ||||
130 | if not self.checkUrl(): | ||||
131 | return "checkUrl" | ||||
132 | | ||||
133 | if self.protocol == "http" and not self.checkSecureUrl(): | ||||
134 | return "checkSecureUrl" | ||||
135 | | ||||
136 | return True | ||||
137 | | ||||
138 | def loadBlacklist(path, ignoreAuto=False): | ||||
139 | blacklist = [] | ||||
140 | try: | ||||
141 | with open(path) as f: | ||||
142 | comment = "" | ||||
143 | for line in f: | ||||
144 | m = re.match(r"^\s*#\s*(.*)", line) | ||||
145 | if m: | ||||
146 | comment = m.group(1).strip() | ||||
147 | continue | ||||
148 | | ||||
149 | if ignoreAuto and comment.startswith("Auto:"): | ||||
150 | comment = "" | ||||
151 | continue | ||||
152 | | ||||
153 | if line.strip(): | ||||
154 | blacklist.append(line.strip()) | ||||
155 | comment = "" | ||||
156 | except FileNotFoundError: | ||||
157 | pass | ||||
158 | return blacklist | ||||
159 | | ||||
160 | def getBlacklist(path, ignoreAuto=False): | ||||
161 | blacklist = [] | ||||
162 | for fname in glob.glob(os.path.join(os.path.dirname(__file__), "*.htignore")): | ||||
163 | if ignoreAuto and fname.endswith('reduce-warning.htignore'): | ||||
164 | continue | ||||
165 | blacklist += loadBlacklist(fname, ignoreAuto) | ||||
166 | return blacklist + loadBlacklist(path, ignoreAuto) | ||||
167 | | ||||
168 | class GitIgnore: | ||||
169 | def __init__(self, path): | ||||
170 | self.basepath = path | ||||
171 | self.patterns = [] | ||||
172 | with open(os.path.join(path, '.gitignore')) as f: | ||||
173 | for line in f: | ||||
174 | self.patterns.append(line.strip()) | ||||
175 | | ||||
176 | def match(self, path): | ||||
177 | for pattern in self.patterns: | ||||
178 | if fnmatch.fnmatch(os.path.relpath(path, self.basepath), pattern): | ||||
179 | return True | ||||
180 | return False | ||||
181 | | ||||
182 | def main(path): | ||||
183 | blacklist = getBlacklist(os.path.join(path, '.htignore'), ignoreAuto=True) | ||||
184 | checker = HTTPChecker(blacklist) | ||||
185 | gitIgnore = GitIgnore(path) | ||||
186 | gitIgnore.patterns.append('.htignore') | ||||
187 | gitIgnore.patterns.append('.gitignore') | ||||
188 | urls = collections.defaultdict(set) | ||||
189 | for dirpath, dirnames, filenames in os.walk(path): | ||||
190 | parts = dirpath.split("/") | ||||
191 | if any(map(lambda p: p in parts, ['.git', 'tests', 'autotests', '3rdparty'])): | ||||
192 | continue | ||||
193 | for fname in filenames: | ||||
194 | fpath = os.path.join(dirpath, fname) | ||||
195 | if gitIgnore.match(fpath): | ||||
196 | continue | ||||
197 | try: | ||||
198 | u = checker.urls(fpath) | ||||
199 | for url in u: | ||||
200 | urls[url].add(fpath) | ||||
201 | except UnicodeDecodeError: | ||||
202 | pass | ||||
203 | | ||||
204 | manual_overwrites = [] | ||||
205 | try: | ||||
206 | with open(os.path.join(path, '.htignore')) as f: | ||||
207 | comment = None | ||||
208 | for line in f: | ||||
209 | m = re.match(r"^\s*#\s*(.*)", line) | ||||
210 | if m: | ||||
211 | comment = m.group(1).strip() | ||||
212 | continue | ||||
213 | | ||||
214 | if comment and comment.startswith("Auto:"): | ||||
215 | comment = None | ||||
216 | continue | ||||
217 | | ||||
218 | if line.strip(): | ||||
219 | manual_overwrites.append((line.strip(), comment)) | ||||
220 | comment = None | ||||
221 | | ||||
222 | except FileNotFoundError: | ||||
223 | pass | ||||
224 | | ||||
225 | auto_overwrites = [] | ||||
226 | for url in sorted(urls): | ||||
227 | u = Url(url) | ||||
228 | updateResult = u.check() | ||||
229 | if updateResult == True and u.protocol == "http": | ||||
230 | for fpath in urls[url]: | ||||
231 | print(f"updating {fpath}") | ||||
232 | with open(fpath, 'r') as f: | ||||
233 | content = f.read() | ||||
234 | with open(fpath, 'w') as f: | ||||
235 | f.write(re.sub(u.url.replace(".","\."),u.secureUrl, content, flags=re.I)) | ||||
236 | elif updateResult != True: | ||||
237 | print(f"{u.url}: {updateResult}") | ||||
238 | if u.protocol == "http": | ||||
239 | if updateResult == "checkDns": | ||||
240 | auto_overwrites.append((u.url.replace(".","\."), "Auto: No DNS response")) | ||||
241 | if updateResult == "checkUrl": | ||||
242 | auto_overwrites.append((u.url.replace(".","\."), "Auto: Url does not answer")) | ||||
243 | if updateResult == "checkSecureUrl": | ||||
244 | auto_overwrites.append((u.url.replace(".","\."), "Auto: No https alternative")) | ||||
245 | | ||||
246 | if not manual_overwrites and not auto_overwrites: | ||||
247 | return | ||||
248 | | ||||
249 | with open(os.path.join(path, '.htignore'), 'w') as ow: | ||||
250 | if manual_overwrites: | ||||
251 | ow.write("\n".join([formatOverwrite(i) for i in manual_overwrites])) | ||||
252 | ow.write("\n") | ||||
253 | | ||||
254 | if auto_overwrites: | ||||
255 | ow.write("\n".join([formatOverwrite(i) for i in sorted(auto_overwrites)])) | ||||
256 | ow.write("\n") | ||||
257 | | ||||
258 | def formatOverwrite(entry): | ||||
259 | if entry[1]: | ||||
260 | return f"# {entry[1]}\n{entry[0]}" | ||||
261 | else: | ||||
262 | return entry[0] | ||||
263 | | ||||
264 | | ||||
265 | if __name__ == "__main__": | ||||
266 | path = "." | ||||
267 | if len(sys.argv) > 1: | ||||
268 | path = sys.argv[1] | ||||
269 | urls = main(path) |