diff --git a/kde-modules/KDECMakeSettings.cmake b/kde-modules/KDECMakeSettings.cmake --- a/kde-modules/KDECMakeSettings.cmake +++ b/kde-modules/KDECMakeSettings.cmake @@ -186,6 +186,14 @@ endif() endfunction() +find_package(Python3 COMPONENTS Interpreter QUIET) +function(httpcheck) + if(TARGET Python3::Interpreter AND NOT httpcheck_added) + set(httpcheck_added TRUE PARENT_SCOPE) + add_test(NAME httpcheck COMMAND Python3::Interpreter ${CMAKE_CURRENT_LIST_DIR}/httpcheck.py ${CMAKE_SOURCE_DIR}) + endif() +endfunction() + if(NOT KDE_SKIP_TEST_SETTINGS) # If there is a CTestConfig.cmake, include CTest. @@ -199,6 +207,7 @@ if(BUILD_TESTING) enable_testing() appstreamtest() + httpcheck() endif () endif () diff --git a/kde-modules/base.htignore b/kde-modules/base.htignore new file mode 100644 --- /dev/null +++ b/kde-modules/base.htignore @@ -0,0 +1,4 @@ +# manual ignore a line. +# Line based overwrite +HTTPSCHECK_IGNORE: true + diff --git a/kde-modules/httpcheck.py b/kde-modules/httpcheck.py new file mode 100755 --- /dev/null +++ b/kde-modules/httpcheck.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 + +# Copyright 2019 Sandro Knauß +# Copyright 2019 Volker Krause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import fnmatch +import glob +import os +import re +import sys + +class HTTPChecker: + def __init__(self, blacklist): + self.blacklist = blacklist + self.reBlacklist = list(map(lambda r:re.compile(r,re.I), blacklist)) + self.matcher = re.compile(r'http://\S', re.I) + + def checkFile(self, fpath): + findings = [] + with open(fpath, 'r') as f: + if fpath.startswith('./'): + fpath = fpath[2:] + linenumber = 0 + for line in f: + linenumber += 1 + m = self.matcher.search(line) + if m: + if m.group(0)[-1] == ".": + continue + if not any(map(lambda r: r.search(line), self.reBlacklist)): + print("{}:{}:\t{}".format(fpath,linenumber,line.strip())) + findings.append((linenumber, line)) + return findings + +def loadBlacklist(path, ignoreAuto=False): + blacklist = [] + try: + with open(path) as f: + comment = "" + for line in f: + m = re.match(r"^\s*#\s*(.*)", line) + if m: + comment = m.group(1).strip() + continue + + if ignoreAuto and comment.startswith("Auto:"): + comment = "" + continue + + if line.strip(): + blacklist.append(line.strip()) + comment = "" + except FileNotFoundError: + pass + return blacklist + +def loadBlacklist(path, ignoreAuto=False): + blacklist = [] + try: + with open(path) as f: + comment = "" + for line in f: + m = re.match(r"^\s*#\s*(.*)", line) + if m: + comment = m.group(1).strip() + continue + + if ignoreAuto and comment.startswith("Auto:"): + comment = "" + continue + + if line.strip(): + blacklist.append(line.strip()) + comment = "" + except FileNotFoundError: + pass + return blacklist + +def getBlacklist(path, ignoreAuto=False): + blacklist = [] + for fname in glob.glob(os.path.join(os.path.dirname(__file__), "*.htignore")): + if ignoreAuto and fname.endswith('reduce-warning.htignore'): + continue + blacklist += loadBlacklist(fname, ignoreAuto) + return blacklist + loadBlacklist(path, ignoreAuto) + +class GitIgnore: + def __init__(self, path): + self.basepath = path + self.patterns = [] + with open(os.path.join(path, '.gitignore')) as f: + for line in f: + self.patterns.append(line.strip()) + + def match(self, path): + for pattern in self.patterns: + if fnmatch.fnmatch(os.path.relpath(path, self.basepath), pattern): + return True + return False + +def main(path): + blacklist = getBlacklist(os.path.join(path, '.htignore')) + checker = HTTPChecker(blacklist) + gitIgnore = GitIgnore(path) + gitIgnore.patterns.append('.htignore') + gitIgnore.patterns.append('.gitignore') + findings = {} + for dirpath, dirnames, filenames in os.walk(path): + parts = dirpath.split("/") + if any(map(lambda p: p in parts, ['.git', 'tests', 'autotests', '3rdparty'])): + continue + for fname in filenames: + fpath = os.path.join(dirpath, fname) + if gitIgnore.match(fpath): + continue + try: + issues = checker.checkFile(fpath) + if issues: + findings[fpath] = issues + except UnicodeDecodeError: + pass + if findings: + sys.exit(1) + else: + sys.exit(0) + +if __name__ == "__main__": + path = "." + if len(sys.argv) > 1: + path = sys.argv[1] + urls = main(path) diff --git a/kde-modules/httpupdate.py b/kde-modules/httpupdate.py new file mode 100755 --- /dev/null +++ b/kde-modules/httpupdate.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 + +# Copyright 2019 Sandro Knauß +# Copyright 2019 Volker Krause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import collections +import dns.resolver +import glob +import fnmatch +import os +import re +import requests +import ssl +import sys +import urllib.parse +import urllib.request + +RESOLVER = dns.resolver.Resolver() +RESOLVER.nameservers=["1.1.1.1", "8.8.8.8"] + +class HTTPChecker: + def __init__(self, blacklist): + self.blacklist = blacklist + self.reBlacklist = list(map(lambda r:re.compile(r,re.I), blacklist)) + self.updater = re.compile(r'(https?://[^. \t\n"\'?<>*#()\\][^ \t\n"\'?<>*#()\\]+)', re.I) + + def urls(self, fpath): + urls = set() + with open(fpath, 'r') as f: + linenumber = 0 + for line in f: + linenumber += 1 + if any(map(lambda r: r.search(line), self.reBlacklist)): + continue + for link in self.updater.findall(line): + urls.add(link) + return urls + +class Url: + def __init__(self, url): + self.url = url + m = re.match(r"^(https?)://([^/]+)((/.*)$|$)", self.url, re.I) + self.protocol = m.group(1) + self.domain = m.group(2) + self.path = m.group(3) + self.resolver = RESOLVER + + def checkDns(self): + try: + self.resolver.query(self.domain, "A") + return True + except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): + pass + + try: + self.resolver.query(self.domain, "AAAA") + return True + except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): + return False + + def checkUrl(self): + try: + r = requests.get(self.url, timeout=5) + if r.status_code in (200, 400, 401, 403, 405, 406): + return True + except (ssl.SSLError): + return False + except (requests.ConnectionError, requests.Timeout): + pass + try: + u = urllib.request.urlopen(self.url, timeout=5) + if u.status == 200: + return True + except (ssl.SSLError): + return False + except (urllib.request.HTTPError, urllib.request.URLError): + return False + return False + + @property + def secureUrl(self): + return "https://{}{}".format(self.domain, self.path) + + def checkSecureUrl(self): + try: + r = requests.get(self.secureUrl, timeout=5) + if r.status_code in (200, 400, 401, 403, 405, 406): + return True + except (ssl.SSLError): + return False + except (requests.ConnectionError, requests.Timeout): + pass + try: + u = urllib.request.urlopen(self.secureUrl, timeout=5) + if u.status == 200: + return True + except (ssl.SSLError): + return False + except (urllib.request.HTTPError, urllib.request.URLError): + return False + return False + + def check(self): + if not self.checkDns(): + return "checkDns" + + if not self.checkUrl(): + return "checkUrl" + + if self.protocol == "http" and not self.checkSecureUrl(): + return "checkSecureUrl" + + return True + +def loadBlacklist(path, ignoreAuto=False): + blacklist = [] + try: + with open(path) as f: + comment = "" + for line in f: + m = re.match(r"^\s*#\s*(.*)", line) + if m: + comment = m.group(1).strip() + continue + + if ignoreAuto and comment.startswith("Auto:"): + comment = "" + continue + + if line.strip(): + blacklist.append(line.strip()) + comment = "" + except FileNotFoundError: + pass + return blacklist + +def getBlacklist(path, ignoreAuto=False): + blacklist = [] + for fname in glob.glob(os.path.join(os.path.dirname(__file__), "*.htignore")): + if ignoreAuto and fname.endswith('reduce-warning.htignore'): + continue + blacklist += loadBlacklist(fname, ignoreAuto) + return blacklist + loadBlacklist(path, ignoreAuto) + +class GitIgnore: + def __init__(self, path): + self.basepath = path + self.patterns = [] + with open(os.path.join(path, '.gitignore')) as f: + for line in f: + self.patterns.append(line.strip()) + + def match(self, path): + for pattern in self.patterns: + if fnmatch.fnmatch(os.path.relpath(path, self.basepath), pattern): + return True + return False + +def main(path): + blacklist = getBlacklist(os.path.join(path, '.htignore'), ignoreAuto=True) + checker = HTTPChecker(blacklist) + gitIgnore = GitIgnore(path) + gitIgnore.patterns.append('.htignore') + gitIgnore.patterns.append('.gitignore') + urls = collections.defaultdict(set) + for dirpath, dirnames, filenames in os.walk(path): + parts = dirpath.split("/") + if any(map(lambda p: p in parts, ['.git', 'tests', 'autotests', '3rdparty'])): + continue + for fname in filenames: + fpath = os.path.join(dirpath, fname) + if gitIgnore.match(fpath): + continue + try: + u = checker.urls(fpath) + for url in u: + urls[url].add(fpath) + except UnicodeDecodeError: + pass + + manual_overwrites = [] + try: + with open(os.path.join(path, '.htignore')) as f: + comment = None + for line in f: + m = re.match(r"^\s*#\s*(.*)", line) + if m: + comment = m.group(1).strip() + continue + + if comment and comment.startswith("Auto:"): + comment = None + continue + + if line.strip(): + manual_overwrites.append((line.strip(), comment)) + comment = None + + except FileNotFoundError: + pass + + auto_overwrites = [] + for url in sorted(urls): + u = Url(url) + updateResult = u.check() + if updateResult == True and u.protocol == "http": + for fpath in urls[url]: + print(f"updating {fpath}") + with open(fpath, 'r') as f: + content = f.read() + with open(fpath, 'w') as f: + f.write(re.sub(u.url.replace(".","\."),u.secureUrl, content, flags=re.I)) + elif updateResult != True: + print(f"{u.url}: {updateResult}") + if u.protocol == "http": + if updateResult == "checkDns": + auto_overwrites.append((u.url.replace(".","\."), "Auto: No DNS response")) + if updateResult == "checkUrl": + auto_overwrites.append((u.url.replace(".","\."), "Auto: Url does not answer")) + if updateResult == "checkSecureUrl": + auto_overwrites.append((u.url.replace(".","\."), "Auto: No https alternative")) + + if not manual_overwrites and not auto_overwrites: + return + + with open(os.path.join(path, '.htignore'), 'w') as ow: + if manual_overwrites: + ow.write("\n".join([formatOverwrite(i) for i in manual_overwrites])) + ow.write("\n") + + if auto_overwrites: + ow.write("\n".join([formatOverwrite(i) for i in sorted(auto_overwrites)])) + ow.write("\n") + +def formatOverwrite(entry): + if entry[1]: + return f"# {entry[1]}\n{entry[0]}" + else: + return entry[0] + + +if __name__ == "__main__": + path = "." + if len(sys.argv) > 1: + path = sys.argv[1] + urls = main(path) diff --git a/kde-modules/nossl.htignore b/kde-modules/nossl.htignore new file mode 100644 --- /dev/null +++ b/kde-modules/nossl.htignore @@ -0,0 +1,4 @@ +http://www.example.com/ +http://127.0.0.1 +Generator: DocBook XSL Stylesheets .* + diff --git a/kde-modules/reduce-warning.htignore b/kde-modules/reduce-warning.htignore new file mode 100644 --- /dev/null +++ b/kde-modules/reduce-warning.htignore @@ -0,0 +1,7 @@ +# Technically this is an issue, but all license headers have this url in place and hiding the real issues. +http://www.gnu.org/licenses/ +# Technically this is an issue, but all license headers have this url in place and hiding the real issues. +http://www.gnu.org/copyleft/ +# Technically this is an issue, but we refer it a lot in headers, as we copy a lot form Qt +http://www.qt-project.org/legal + diff --git a/kde-modules/xml.htignore b/kde-modules/xml.htignore new file mode 100644 --- /dev/null +++ b/kde-modules/xml.htignore @@ -0,0 +1,16 @@ +http://www.kde.org/standards/kcfg/ +http://www.kde.org/standards/kxmlgui/ +http://www.freedesktop.org/standards/ +http://www.w3.org/ +xmlns +rdf:resource +!DOCTYPE +http://www.inkscape.org/\) --> +http://purl.org/ +http://schema.org +http://xml.org/sax/ +schemas.google.com +schemas.microsoft.com +semanticdesktop.org/onto +http://nepomuk.kde.org/ontologies/ +