diff --git a/kde-modules/KDECMakeSettings.cmake b/kde-modules/KDECMakeSettings.cmake
--- a/kde-modules/KDECMakeSettings.cmake
+++ b/kde-modules/KDECMakeSettings.cmake
@@ -186,6 +186,14 @@
endif()
endfunction()
+find_package(Python3 COMPONENTS Interpreter QUIET)
+function(httpcheck)
+ if(TARGET Python3::Interpreter AND NOT httpcheck_added)
+ set(httpcheck_added TRUE PARENT_SCOPE)
+ add_test(NAME httpcheck COMMAND Python3::Interpreter ${CMAKE_CURRENT_LIST_DIR}/httpcheck.py ${CMAKE_SOURCE_DIR})
+ endif()
+endfunction()
+
if(NOT KDE_SKIP_TEST_SETTINGS)
# If there is a CTestConfig.cmake, include CTest.
@@ -199,6 +207,7 @@
if(BUILD_TESTING)
enable_testing()
appstreamtest()
+ httpcheck()
endif ()
endif ()
diff --git a/kde-modules/blacklist.list b/kde-modules/blacklist.list
new file mode 100644
--- /dev/null
+++ b/kde-modules/blacklist.list
@@ -0,0 +1,26 @@
+# manual ignore a line.
+HTTPSCHECK_IGNORE: true
+http://www.kde.org/standards/kcfg/
+http://www.kde.org/standards/kxmlgui/
+http://www.freedesktop.org/standards/
+http://www.w3.org/
+http://www.example.com/
+xmlns
+rdf:resource
+!DOCTYPE
+http://www.inkscape.org/\) -->
+http://purl.org/
+http://schema.org
+http://xml.org/sax/
+schemas.google.com
+schemas.microsoft.com
+semanticdesktop.org/onto
+http://nepomuk.kde.org/ontologies/
+Generator: DocBook XSL Stylesheets .*
+# Technically this is an issue, but all license headers have this url in place and hiding the real issues.
+http://www.gnu.org/licenses/
+# Technically this is an issue, but all license headers have this url in place and hiding the real issues.
+http://www.gnu.org/copyleft/
+# Technically this is an issue, but we refer it a lot in headers, as we copy a lot form Qt
+http://www.qt-project.org/legal
+
diff --git a/kde-modules/httpcheck.py b/kde-modules/httpcheck.py
new file mode 100755
--- /dev/null
+++ b/kde-modules/httpcheck.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+
+# Copyright 2019 Sandro Knauß
+# Copyright 2019 Volker Krause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import re
+import sys
+
+BLACKLIST = [
+ r"HTTPSCHECK_IGNORE: true", # manual ignore a line.
+ r"http://www.kde.org/standards/kcfg/",
+ r"http://www.kde.org/standards/kxmlgui/",
+ r"http://www.freedesktop.org/standards/",
+ r"http://www.w3.org/",
+ r"xmlns",
+ r"rdf:resource",
+ r"!DOCTYPE",
+ r"http://www.inkscape.org/\) -->",
+ r"http://purl.org/",
+ r"http://schema.org",
+ r"http://xml.org/sax/",
+ r"schemas.google.com",
+ r"schemas.microsoft.com",
+ r"semanticdesktop.org/onto",
+ r"http://nepomuk.kde.org/ontologies/",
+ r"Generator: DocBook XSL Stylesheets .* ",
+ r"http://www.gnu.org/licenses/", # Technically this is an issue, but all license headers have this url in place and hiding the real issues.
+ r"http://www.gnu.org/copyleft/", # Technically this is an issue, but all license headers have this url in place and hiding the real issues.
+ r"http://www.qt-project.org/legal", # Technically this is an issue, but we refer it a lot in headers, as we copy a lot form Qt
+]
+
+class HTTPChecker:
+ def __init__(self, blacklist):
+ self.blacklist = blacklist
+ self.reBlacklist = list(map(lambda r:re.compile(r,re.I), blacklist))
+ self.matcher = re.compile(r'http://\S', re.I)
+
+ def checkFile(self, fpath):
+ findings = []
+ with open(fpath, 'r') as f:
+ if fpath.startswith('./'):
+ fpath = fpath[2:]
+ linenumber = 0
+ for line in f:
+ linenumber += 1
+ m = self.matcher.search(line)
+ if m:
+ if not any(map(lambda r: r.search(line), self.reBlacklist)):
+ print("{}:{}:\t{}".format(fpath,linenumber,line.strip()))
+ findings.append((linenumber, line))
+ return findings
+
+def loadBlacklist(path, ignoreAuto=False):
+ blacklist = []
+ try:
+ with open(path) as f:
+ comment = ""
+ for line in f:
+ m = re.match(r"^\s*#\s*(.*)", line)
+ if m:
+ comment = m.group(1).strip()
+ continue
+
+ if ignoreAuto and comment.startswith("Auto:"):
+ comment = ""
+ continue
+
+ if line.strip():
+ blacklist.append(line.strip())
+ comment = ""
+ except FileNotFoundError:
+ pass
+ return blacklist
+
+
+def getBlacklist(path, ignoreAuto=False):
+ blacklist = loadBlacklist(os.path.join(os.path.dirname(__file__),'blacklist.list'), ignoreAuto)
+ return blacklist + loadBlacklist(path, ignoreAuto)
+
+def main(path):
+ blacklist = getBlacklist(os.path.join(path, '.httpcheck-overwrite'))
+ checker = HTTPChecker(blacklist)
+ findings = {}
+ for dirpath, dirnames, filenames in os.walk(path):
+ parts = dirpath.split("/")
+ if any(map(lambda p: p in parts, ['.git', 'tests', 'autotests'])):
+ continue
+ for fname in filenames:
+ try:
+ fpath = os.path.join(dirpath, fname)
+ if fpath == os.path.join(path, '.httpcheck-overwrite'):
+ continue
+ issues = checker.checkFile(fpath)
+ if issues:
+ findings[fpath] = issues
+ except UnicodeDecodeError:
+ pass
+ if findings:
+ sys.exit(1)
+ else:
+ sys.exit(0)
+
+if __name__ == "__main__":
+ path = "."
+ if len(sys.argv) > 1:
+ path = sys.argv[1]
+ urls = main(path)
diff --git a/kde-modules/httpupdate.py b/kde-modules/httpupdate.py
new file mode 100755
--- /dev/null
+++ b/kde-modules/httpupdate.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+
+# Copyright 2019 Sandro Knauß
+# Copyright 2019 Volker Krause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import collections
+import dns.resolver
+import os
+import re
+import requests
+import ssl
+import sys
+import urllib.parse
+import urllib.request
+
+RESOLVER = dns.resolver.Resolver()
+RESOLVER.nameservers=["1.1.1.1", "8.8.8.8"]
+
+class HTTPChecker:
+ def __init__(self, blacklist):
+ self.blacklist = blacklist
+ self.reBlacklist = list(map(lambda r:re.compile(r,re.I), blacklist))
+ self.updater = re.compile(r'(https?://[^ \t\n"\'?<>*#()]+)', re.I)
+
+ def urls(self, fpath):
+ urls = set()
+ with open(fpath, 'r') as f:
+ linenumber = 0
+ for line in f:
+ linenumber += 1
+ m = self.updater.search(line)
+ if m:
+ if not any(map(lambda r: r.search(line), self.reBlacklist)):
+ urls.add(m.group(1))
+ return urls
+
+class Url:
+ def __init__(self, url):
+ self.url = url
+ m = re.match(r"^(https?)://([^/]+)((/.*)$|$)", self.url, re.I)
+ self.protocol = m.group(1)
+ self.domain = m.group(2)
+ self.path = m.group(3)
+ self.resolver = RESOLVER
+
+ def checkDns(self):
+ try:
+ self.resolver.query(self.domain, "A")
+ return True
+ except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
+ pass
+
+ try:
+ self.resolver.query(self.domain, "AAAA")
+ return True
+ except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
+ return False
+
+ def checkUrl(self):
+ try:
+ r = requests.get(self.url, timeout=5)
+ if r.status_code in (200, 400, 401, 403, 405, 406):
+ return True
+ except (ssl.SSLError):
+ return False
+ except (requests.ConnectionError, requests.Timeout):
+ pass
+ try:
+ u = urllib.request.urlopen(self.url, timeout=5)
+ if u.status == 200:
+ return True
+ except (ssl.SSLError):
+ return False
+ except (urllib.request.HTTPError, urllib.request.URLError):
+ return False
+ return False
+
+ @property
+ def secureUrl(self):
+ return "https://{}{}".format(self.domain, self.path)
+
+ def checkSecureUrl(self):
+ try:
+ r = requests.get(self.secureUrl, timeout=5)
+ if r.status_code in (200, 400, 401, 403, 405, 406):
+ return True
+ except (ssl.SSLError):
+ return False
+ except (requests.ConnectionError, requests.Timeout):
+ pass
+ try:
+ u = urllib.request.urlopen(self.secureUrl, timeout=5)
+ if u.status == 200:
+ return True
+ except (ssl.SSLError):
+ return False
+ except (urllib.request.HTTPError, urllib.request.URLError):
+ return False
+ return False
+
+ def check(self):
+ if not self.checkDns():
+ return "checkDns"
+
+ if not self.checkUrl():
+ return "checkUrl"
+
+ if self.protocol == "http" and not self.checkSecureUrl():
+ return "checkSecureUrl"
+
+ return True
+
+def loadBlacklist(path, ignoreAuto=False):
+ blacklist = []
+ try:
+ with open(path) as f:
+ comment = ""
+ for line in f:
+ m = re.match(r"^\s*#\s*(.*)", line)
+ if m:
+ comment = m.group(1).strip()
+ continue
+
+ if ignoreAuto and comment.startswith("Auto:"):
+ comment = ""
+ continue
+
+ if line.strip():
+ blacklist.append(line.strip())
+ comment = ""
+ except FileNotFoundError:
+ pass
+ return blacklist
+
+
+def getBlacklist(path, ignoreAuto=False):
+ blacklist = loadBlacklist(os.path.join(os.path.dirname(__file__),'blacklist.list'), ignoreAuto)
+ return blacklist + loadBlacklist(path, ignoreAuto)
+
+def main(path):
+ blacklist = getBlacklist(os.path.join(path, '.httpcheck-overwrite'), ignoreAuto=True)
+ checker = HTTPChecker(blacklist)
+ urls = collections.defaultdict(set)
+ for dirpath, dirnames, filenames in os.walk(path):
+ parts = dirpath.split("/")
+ if any(map(lambda p: p in parts, ['.git', 'tests', 'autotests'])):
+ continue
+ for fname in filenames:
+ try:
+ fpath = os.path.join(dirpath, fname)
+ if fpath == os.path.join(path, '.httpcheck-overwrite'):
+ continue
+ u = checker.urls(fpath)
+ for url in u:
+ urls[url].add(fpath)
+ except UnicodeDecodeError:
+ pass
+
+ manual_overwrites = []
+ try:
+ with open(os.path.join(path, '.httpcheck-overwrite')) as f:
+ comment = None
+ for line in f:
+ m = re.match(r"^\s*#\s*(.*)", line)
+ if m:
+ comment = m.group(1).strip()
+ continue
+
+ if comment and comment.startswith("Auto:"):
+ comment = None
+ continue
+
+ if line.strip():
+ manual_overwrites.append((line.strip(), comment))
+ comment = None
+
+ except FileNotFoundError:
+ pass
+
+ auto_overwrites = []
+ for url in sorted(urls):
+ u = Url(url)
+ updateResult = u.check()
+ if updateResult == True and u.protocol == "http":
+ for fpath in urls[url]:
+ print(f"updating {fpath}")
+ with open(fpath, 'r') as f:
+ content = f.read()
+ with open(fpath, 'w') as f:
+ f.write(re.sub(u.url.replace(".","\."),u.secureUrl, content, flags=re.I))
+ elif updateResult != True:
+ print(f"{u.url}: {updateResult}")
+ if u.protocol == "http":
+ if updateResult == "checkDns":
+ auto_overwrites.append((u.url.replace(".","\."), "Auto: No DNS response"))
+ if updateResult == "checkUrl":
+ auto_overwrites.append((u.url.replace(".","\."), "Auto: Url does not answer"))
+ if updateResult == "checkSecureUrl":
+ auto_overwrites.append((u.url.replace(".","\."), "Auto: No https alternative"))
+
+ if not manual_overwrites and not auto_overwrites:
+ return
+
+ with open(os.path.join(path, '.httpcheck-overwrite'), 'w') as ow:
+ if manual_overwrites:
+ ow.write("\n".join([formatOverwrite(i) for i in manual_overwrites]))
+ ow.write("\n")
+
+ if auto_overwrites:
+ ow.write("\n".join([formatOverwrite(i) for i in sorted(auto_overwrites)]))
+ ow.write("\n")
+
+def formatOverwrite(entry):
+ if entry[1]:
+ return f"# {entry[1]}\n{entry[0]}"
+ else:
+ return entry[0]
+
+
+if __name__ == "__main__":
+ path = "."
+ if len(sys.argv) > 1:
+ path = sys.argv[1]
+ urls = main(path)