diff --git a/kde-modules/KDECMakeSettings.cmake b/kde-modules/KDECMakeSettings.cmake --- a/kde-modules/KDECMakeSettings.cmake +++ b/kde-modules/KDECMakeSettings.cmake @@ -186,6 +186,14 @@ endif() endfunction() +find_package(Python3 COMPONENTS Interpreter QUIET) +function(httpcheck) + if(TARGET Python3::Interpreter AND NOT httpcheck_added) + set(httpcheck_added TRUE PARENT_SCOPE) + add_test(NAME httpcheck COMMAND Python3::Interpreter ${CMAKE_CURRENT_LIST_DIR}/httpcheck.py ${CMAKE_SOURCE_DIR}) + endif() +endfunction() + if(NOT KDE_SKIP_TEST_SETTINGS) # If there is a CTestConfig.cmake, include CTest. @@ -199,6 +207,7 @@ if(BUILD_TESTING) enable_testing() appstreamtest() + httpcheck() endif () endif () diff --git a/kde-modules/blacklist.list b/kde-modules/blacklist.list new file mode 100644 --- /dev/null +++ b/kde-modules/blacklist.list @@ -0,0 +1,26 @@ +# manual ignore a line. +HTTPSCHECK_IGNORE: true +http://www.kde.org/standards/kcfg/ +http://www.kde.org/standards/kxmlgui/ +http://www.freedesktop.org/standards/ +http://www.w3.org/ +http://www.example.com/ +xmlns +rdf:resource +!DOCTYPE +http://www.inkscape.org/\) --> +http://purl.org/ +http://schema.org +http://xml.org/sax/ +schemas.google.com +schemas.microsoft.com +semanticdesktop.org/onto +http://nepomuk.kde.org/ontologies/ +Generator: DocBook XSL Stylesheets .* +# Technically this is an issue, but all license headers have this url in place and hiding the real issues. +http://www.gnu.org/licenses/ +# Technically this is an issue, but all license headers have this url in place and hiding the real issues. +http://www.gnu.org/copyleft/ +# Technically this is an issue, but we refer it a lot in headers, as we copy a lot form Qt +http://www.qt-project.org/legal + diff --git a/kde-modules/httpcheck.py b/kde-modules/httpcheck.py new file mode 100755 --- /dev/null +++ b/kde-modules/httpcheck.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 + +# Copyright 2019 Sandro Knauß +# Copyright 2019 Volker Krause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import re +import sys + +BLACKLIST = [ + r"HTTPSCHECK_IGNORE: true", # manual ignore a line. + r"http://www.kde.org/standards/kcfg/", + r"http://www.kde.org/standards/kxmlgui/", + r"http://www.freedesktop.org/standards/", + r"http://www.w3.org/", + r"xmlns", + r"rdf:resource", + r"!DOCTYPE", + r"http://www.inkscape.org/\) -->", + r"http://purl.org/", + r"http://schema.org", + r"http://xml.org/sax/", + r"schemas.google.com", + r"schemas.microsoft.com", + r"semanticdesktop.org/onto", + r"http://nepomuk.kde.org/ontologies/", + r"Generator: DocBook XSL Stylesheets .* ", + r"http://www.gnu.org/licenses/", # Technically this is an issue, but all license headers have this url in place and hiding the real issues. + r"http://www.gnu.org/copyleft/", # Technically this is an issue, but all license headers have this url in place and hiding the real issues. + r"http://www.qt-project.org/legal", # Technically this is an issue, but we refer it a lot in headers, as we copy a lot form Qt +] + +class HTTPChecker: + def __init__(self, blacklist): + self.blacklist = blacklist + self.reBlacklist = list(map(lambda r:re.compile(r,re.I), blacklist)) + self.matcher = re.compile(r'http://\S', re.I) + + def checkFile(self, fpath): + findings = [] + with open(fpath, 'r') as f: + if fpath.startswith('./'): + fpath = fpath[2:] + linenumber = 0 + for line in f: + linenumber += 1 + m = self.matcher.search(line) + if m: + if not any(map(lambda r: r.search(line), self.reBlacklist)): + print("{}:{}:\t{}".format(fpath,linenumber,line.strip())) + findings.append((linenumber, line)) + return findings + +def loadBlacklist(path, ignoreAuto=False): + blacklist = [] + try: + with open(path) as f: + comment = "" + for line in f: + m = re.match(r"^\s*#\s*(.*)", line) + if m: + comment = m.group(1).strip() + continue + + if ignoreAuto and comment.startswith("Auto:"): + comment = "" + continue + + if line.strip(): + blacklist.append(line.strip()) + comment = "" + except FileNotFoundError: + pass + return blacklist + + +def getBlacklist(path, ignoreAuto=False): + blacklist = loadBlacklist(os.path.join(os.path.dirname(__file__),'blacklist.list'), ignoreAuto) + return blacklist + loadBlacklist(path, ignoreAuto) + +def main(path): + blacklist = getBlacklist(os.path.join(path, '.httpcheck-overwrite')) + checker = HTTPChecker(blacklist) + findings = {} + for dirpath, dirnames, filenames in os.walk(path): + parts = dirpath.split("/") + if any(map(lambda p: p in parts, ['.git', 'tests', 'autotests'])): + continue + for fname in filenames: + try: + fpath = os.path.join(dirpath, fname) + if fpath == os.path.join(path, '.httpcheck-overwrite'): + continue + issues = checker.checkFile(fpath) + if issues: + findings[fpath] = issues + except UnicodeDecodeError: + pass + if findings: + sys.exit(1) + else: + sys.exit(0) + +if __name__ == "__main__": + path = "." + if len(sys.argv) > 1: + path = sys.argv[1] + urls = main(path) diff --git a/kde-modules/httpupdate.py b/kde-modules/httpupdate.py new file mode 100755 --- /dev/null +++ b/kde-modules/httpupdate.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 + +# Copyright 2019 Sandro Knauß +# Copyright 2019 Volker Krause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import collections +import dns.resolver +import os +import re +import requests +import ssl +import sys +import urllib.parse +import urllib.request + +RESOLVER = dns.resolver.Resolver() +RESOLVER.nameservers=["1.1.1.1", "8.8.8.8"] + +class HTTPChecker: + def __init__(self, blacklist): + self.blacklist = blacklist + self.reBlacklist = list(map(lambda r:re.compile(r,re.I), blacklist)) + self.updater = re.compile(r'(https?://[^ \t\n"\'?<>*#()]+)', re.I) + + def urls(self, fpath): + urls = set() + with open(fpath, 'r') as f: + linenumber = 0 + for line in f: + linenumber += 1 + m = self.updater.search(line) + if m: + if not any(map(lambda r: r.search(line), self.reBlacklist)): + urls.add(m.group(1)) + return urls + +class Url: + def __init__(self, url): + self.url = url + m = re.match(r"^(https?)://([^/]+)((/.*)$|$)", self.url, re.I) + self.protocol = m.group(1) + self.domain = m.group(2) + self.path = m.group(3) + self.resolver = RESOLVER + + def checkDns(self): + try: + self.resolver.query(self.domain, "A") + return True + except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): + pass + + try: + self.resolver.query(self.domain, "AAAA") + return True + except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): + return False + + def checkUrl(self): + try: + r = requests.get(self.url, timeout=5) + if r.status_code in (200, 400, 401, 403, 405, 406): + return True + except (ssl.SSLError): + return False + except (requests.ConnectionError, requests.Timeout): + pass + try: + u = urllib.request.urlopen(self.url, timeout=5) + if u.status == 200: + return True + except (ssl.SSLError): + return False + except (urllib.request.HTTPError, urllib.request.URLError): + return False + return False + + @property + def secureUrl(self): + return "https://{}{}".format(self.domain, self.path) + + def checkSecureUrl(self): + try: + r = requests.get(self.secureUrl, timeout=5) + if r.status_code in (200, 400, 401, 403, 405, 406): + return True + except (ssl.SSLError): + return False + except (requests.ConnectionError, requests.Timeout): + pass + try: + u = urllib.request.urlopen(self.secureUrl, timeout=5) + if u.status == 200: + return True + except (ssl.SSLError): + return False + except (urllib.request.HTTPError, urllib.request.URLError): + return False + return False + + def check(self): + if not self.checkDns(): + return "checkDns" + + if not self.checkUrl(): + return "checkUrl" + + if self.protocol == "http" and not self.checkSecureUrl(): + return "checkSecureUrl" + + return True + +def loadBlacklist(path, ignoreAuto=False): + blacklist = [] + try: + with open(path) as f: + comment = "" + for line in f: + m = re.match(r"^\s*#\s*(.*)", line) + if m: + comment = m.group(1).strip() + continue + + if ignoreAuto and comment.startswith("Auto:"): + comment = "" + continue + + if line.strip(): + blacklist.append(line.strip()) + comment = "" + except FileNotFoundError: + pass + return blacklist + + +def getBlacklist(path, ignoreAuto=False): + blacklist = loadBlacklist(os.path.join(os.path.dirname(__file__),'blacklist.list'), ignoreAuto) + return blacklist + loadBlacklist(path, ignoreAuto) + +def main(path): + blacklist = getBlacklist(os.path.join(path, '.httpcheck-overwrite'), ignoreAuto=True) + checker = HTTPChecker(blacklist) + urls = collections.defaultdict(set) + for dirpath, dirnames, filenames in os.walk(path): + parts = dirpath.split("/") + if any(map(lambda p: p in parts, ['.git', 'tests', 'autotests'])): + continue + for fname in filenames: + try: + fpath = os.path.join(dirpath, fname) + if fpath == os.path.join(path, '.httpcheck-overwrite'): + continue + u = checker.urls(fpath) + for url in u: + urls[url].add(fpath) + except UnicodeDecodeError: + pass + + manual_overwrites = [] + try: + with open(os.path.join(path, '.httpcheck-overwrite')) as f: + comment = None + for line in f: + m = re.match(r"^\s*#\s*(.*)", line) + if m: + comment = m.group(1).strip() + continue + + if comment and comment.startswith("Auto:"): + comment = None + continue + + if line.strip(): + manual_overwrites.append((line.strip(), comment)) + comment = None + + except FileNotFoundError: + pass + + auto_overwrites = [] + for url in sorted(urls): + u = Url(url) + updateResult = u.check() + if updateResult == True and u.protocol == "http": + for fpath in urls[url]: + print(f"updating {fpath}") + with open(fpath, 'r') as f: + content = f.read() + with open(fpath, 'w') as f: + f.write(re.sub(u.url.replace(".","\."),u.secureUrl, content, flags=re.I)) + elif updateResult != True: + print(f"{u.url}: {updateResult}") + if u.protocol == "http": + if updateResult == "checkDns": + auto_overwrites.append((u.url.replace(".","\."), "Auto: No DNS response")) + if updateResult == "checkUrl": + auto_overwrites.append((u.url.replace(".","\."), "Auto: Url does not answer")) + if updateResult == "checkSecureUrl": + auto_overwrites.append((u.url.replace(".","\."), "Auto: No https alternative")) + + if not manual_overwrites and not auto_overwrites: + return + + with open(os.path.join(path, '.httpcheck-overwrite'), 'w') as ow: + if manual_overwrites: + ow.write("\n".join([formatOverwrite(i) for i in manual_overwrites])) + ow.write("\n") + + if auto_overwrites: + ow.write("\n".join([formatOverwrite(i) for i in sorted(auto_overwrites)])) + ow.write("\n") + +def formatOverwrite(entry): + if entry[1]: + return f"# {entry[1]}\n{entry[0]}" + else: + return entry[0] + + +if __name__ == "__main__": + path = "." + if len(sys.argv) > 1: + path = sys.argv[1] + urls = main(path)