diff --git a/CMakeLists.txt b/CMakeLists.txt
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -299,6 +299,7 @@
 option(KWIN_BUILD_DECORATIONS "Enable building of KWin decorations." ON)
 option(KWIN_BUILD_KCMS "Enable building of KWin configuration modules." ON)
 option(KWIN_BUILD_TABBOX "Enable building of KWin Tabbox functionality" ON)
+option(KWIN_BUILD_PERF "Build internal tools for performance analysis at runtime." ON)
 option(KWIN_BUILD_XRENDER_COMPOSITING "Enable building of KWin with XRender Compositing support" ON)
 cmake_dependent_option(KWIN_BUILD_ACTIVITIES "Enable building of KWin with kactivities support" ON "KF5Activities_FOUND" OFF)
 
@@ -494,6 +495,7 @@
     touch_hide_cursor_spy.cpp
     internal_client.cpp
     xwl/xwayland_interface.cpp
+    perf.cpp
    )
 
 include(ECMQtDeclareLoggingCategory)
@@ -531,6 +533,13 @@
     )
 endif()
 
+if(KWIN_BUILD_PERF)
+    set(
+        kwin_KDEINIT_SRCS ${kwin_KDEINIT_SRCS}
+        helpers/perf/ftrace_marker.cpp
+    )
+endif()
+
 if (HAVE_LINUX_VT_H)
         set(kwin_KDEINIT_SRCS
             ${kwin_KDEINIT_SRCS}
diff --git a/composite.h b/composite.h
--- a/composite.h
+++ b/composite.h
@@ -144,7 +144,7 @@
      * Continues the startup after Scene And Workspace are created
      */
     void startupWithWorkspace();
-    virtual void performCompositing();
+    virtual void performCompositing(bool force = false);
 
     virtual void configChanged();
 
@@ -157,30 +157,37 @@
 
     void setupX11Support();
 
-    void setCompositeTimer();
+    void setFallbackTimer();
     bool windowRepaintsPending() const;
 
     void releaseCompositorSelection();
     void deleteUnusedSupportProperties();
 
+    bool delayPerform();
+    void updatePaintTimes(qint64 time);
+
     State m_state;
 
-    QBasicTimer compositeTimer;
     CompositorSelectionOwner *m_selectionOwner;
     QTimer m_releaseSelectionTimer;
     QList<xcb_atom_t> m_unusedSupportProperties;
     QTimer m_unusedSupportPropertyTimer;
-    qint64 vBlankInterval, fpsInterval;
     QRegion repaints_region;
 
-    qint64 m_timeSinceLastVBlank;
-
     Scene *m_scene;
 
     bool m_bufferSwapPending;
     bool m_composeAtSwapCompletion;
 
     int m_framesToTestForSafety = 3;
+
+    QBasicTimer m_delayTimer;
+
+    // In nano seconds.
+    qint64 m_paintTimes[4] = {0};
+    size_t m_paintTimesIndex = 0;
+
+    QBasicTimer m_fallbackTimer;
     QElapsedTimer m_monotonicClock;
 };
 
@@ -272,7 +279,7 @@
 
 protected:
     void start() override;
-    void performCompositing() override;
+    void performCompositing(bool force) override;
 
 private:
     explicit X11Compositor(QObject *parent);
diff --git a/composite.cpp b/composite.cpp
--- a/composite.cpp
+++ b/composite.cpp
@@ -25,6 +25,7 @@
 #include "deleted.h"
 #include "effects.h"
 #include "overlaywindow.h"
+#include "perf.h"
 #include "platform.h"
 #include "scene.h"
 #include "screens.h"
@@ -122,9 +123,6 @@
     : QObject(workspace)
     , m_state(State::Off)
     , m_selectionOwner(NULL)
-    , vBlankInterval(0)
-    , fpsInterval(0)
-    , m_timeSinceLastVBlank(0)
     , m_scene(NULL)
     , m_bufferSwapPending(false)
     , m_composeAtSwapCompletion(false)
@@ -328,22 +326,8 @@
     Workspace::self()->markXStackingOrderAsDirty();
     Q_ASSERT(m_scene);
 
-    connect(workspace(), &Workspace::destroyed, this, [this] { compositeTimer.stop(); });
+    connect(workspace(), &Workspace::destroyed, this, [this] { m_fallbackTimer.stop(); });
     setupX11Support();
-    fpsInterval = options->maxFpsInterval();
-
-    if (m_scene->syncsToVBlank()) {
-        // If we do vsync, set the fps to the next multiple of the vblank rate.
-        vBlankInterval = milliToNano(1000) / currentRefreshRate();
-        fpsInterval = qMax((fpsInterval / vBlankInterval) * vBlankInterval, vBlankInterval);
-    } else {
-        // No vsync - DO NOT set "0", would cause div-by-zero segfaults.
-        vBlankInterval = milliToNano(1);
-    }
-
-    // This means "start now" - we don't have even a slight idea when the first vsync will occur.
-    m_timeSinceLastVBlank = fpsInterval - (options->vBlankTime() + 1);
-    scheduleRepaint();
 
     // Sets also the 'effects' pointer.
     kwinApp()->platform()->createEffectsHandler(this, m_scene);
@@ -389,8 +373,26 @@
 
 void Compositor::scheduleRepaint()
 {
-    if (!compositeTimer.isActive())
-        setCompositeTimer();
+    if (m_state != State::On) {
+        return;
+    }
+    if (!kwinApp()->platform()->areOutputsEnabled()) {
+        return;
+    }
+
+    // TODO: Make this distinction not on the question if there is a swap event but if per screen
+    //       rendering? On X we get swap events but they are aligned with the "wrong" screen if
+    //       it the primary/first one is not the one with the highest refresh rate.
+    //       But on the other side Present extension does not allow to sync with another screen
+    //       anyway.
+
+    if (m_scene->hasSwapEvent()) {
+        // TODO: If we don't call it back from the event loop we often crash on Wayland
+        //       in AnimationEffect::postPaintScreen. Why?
+        QTimer::singleShot(0, this, [this]() { performCompositing(); });
+    } else {
+        setFallbackTimer();
+    }
 }
 
 void Compositor::stop()
@@ -454,7 +456,7 @@
 
     delete m_scene;
     m_scene = NULL;
-    compositeTimer.stop();
+    m_fallbackTimer.stop();
     repaints_region = QRegion();
 
     m_state = State::Off;
@@ -574,10 +576,14 @@
 
 void Compositor::timerEvent(QTimerEvent *te)
 {
-    if (te->timerId() == compositeTimer.timerId()) {
+    if  (te->timerId() == m_delayTimer.timerId()){
+        m_delayTimer.stop();
+        performCompositing(true);
+    } else if (te->timerId() == m_fallbackTimer.timerId()) {
         performCompositing();
-    } else
+    } else {
         QObject::timerEvent(te);
+    }
 }
 
 void Compositor::aboutToSwapBuffers()
@@ -593,30 +599,76 @@
     m_bufferSwapPending = false;
 
     emit bufferSwapCompleted();
+    Perf::ftrace(QStringLiteral("Swaped"));
 
     if (m_composeAtSwapCompletion) {
         m_composeAtSwapCompletion = false;
         performCompositing();
     }
 }
 
-void Compositor::performCompositing()
+void Compositor::updatePaintTimes(qint64 time)
+{
+//    const size_t prevIndex = m_paintTimesIndex;
+    const size_t nextIndex = (m_paintTimesIndex + 1) % 4;
+
+    m_paintTimes[nextIndex] = time;
+    m_paintTimesIndex = nextIndex;
+}
+
+bool Compositor::delayPerform()
+{
+    // Here we calculate in nano seconds.
+//    const quint64 frameDuration = milliToNano(1000) / (double)currentRefreshRate();
+    const quint64 frameDuration = milliToNano(1000) / refreshRate();
+
+    quint64 avgPaintTime = 0;
+    for (const auto time : m_paintTimes) {
+        avgPaintTime += time;
+    }
+    avgPaintTime = avgPaintTime / 4;
+
+    // We give at least 1 millisecond time to paint.
+    const qint64 minGap = 1;
+    const qint64 delay = nanoToMilli(frameDuration - avgPaintTime) - minGap;
+
+    if (delay <= 0) {
+        return false;
+    }
+
+    m_delayTimer.start(delay, this);
+    return true;
+}
+
+static ulong s_msc = 0;
+
+void Compositor::performCompositing(bool force)
 {
     // If a buffer swap is still pending, we return to the event loop and
     // continue processing events until the swap has completed.
     if (m_bufferSwapPending) {
         m_composeAtSwapCompletion = true;
-        compositeTimer.stop();
         return;
     }
 
     // If outputs are disabled, we return to the event loop and
     // continue processing events until the outputs are enabled again
     if (!kwinApp()->platform()->areOutputsEnabled()) {
-        compositeTimer.stop();
+        m_fallbackTimer.stop();
+        return;
+    }
+
+    if (m_delayTimer.isActive()) {
         return;
     }
 
+    if (!force && m_scene->hasSwapEvent()) {
+        // TODO: instead only check if vsynced? Currently not working without swap event
+        if (delayPerform()) {
+            return;
+        }
+    }
+
     // Create a list of all windows in the stacking order
     ToplevelList windows = Workspace::self()->xStackingOrder();
     ToplevelList damaged;
@@ -659,15 +711,17 @@
 
     if (repaints_region.isEmpty() && !windowRepaintsPending()) {
         m_scene->idle();
-        m_timeSinceLastVBlank = fpsInterval - (options->vBlankTime() + 1); // means "start now"
-        // Note: It would seem here we should undo suspended unredirect, but when scenes need
-        // it for some reason, e.g. transformations or translucency, the next pass that does not
-        // need this anymore and paints normally will also reset the suspended unredirect.
-        // Otherwise the window would not be painted normally anyway.
-        compositeTimer.stop();
+
+        // TODO: Should we start directly on next damage or can we just ignore the
+        //       additional latency of one frame and run the timer once more?
+//        m_timeSinceLastVBlank = fpsInterval - (options->vBlankTime() + 1); // means "start now"
+
+        m_fallbackTimer.stop();
         return;
     }
 
+    Perf::ftrace(s_msc, QStringLiteral("Paint"));
+
     // Skip windows that are not yet ready for being painted and if screen is locked skip windows
     // that are neither lockscreen nor inputmethod windows.
     //
@@ -692,7 +746,10 @@
     if (m_framesToTestForSafety > 0 && (m_scene->compositingType() & OpenGLCompositing)) {
         kwinApp()->platform()->createOpenGLSafePoint(Platform::OpenGLSafePoint::PreFrame);
     }
-    m_timeSinceLastVBlank = m_scene->paint(repaints, windows);
+
+    const qint64 paintDuration = m_scene->paint(repaints, windows);
+    updatePaintTimes(paintDuration);
+
     if (m_framesToTestForSafety > 0) {
         if (m_scene->compositingType() & OpenGLCompositing) {
             kwinApp()->platform()->createOpenGLSafePoint(Platform::OpenGLSafePoint::PostFrame);
@@ -715,13 +772,16 @@
 
     // Stop here to ensure *we* cause the next repaint schedule - not some effect
     // through m_scene->paint().
-    compositeTimer.stop();
+    m_fallbackTimer.stop();
+
+    Perf::ftrace(QStringLiteral("Paint"), s_msc);
+    s_msc++;
 
     // Trigger at least one more pass even if there would be nothing to paint, so that scene->idle()
     // is called the next time. If there would be nothing pending, it will not restart the timer and
     // scheduleRepaint() would restart it again somewhen later, called from functions that
     // would again add something pending.
-    if (m_bufferSwapPending && m_scene->syncsToVBlank()) {
+    if (m_bufferSwapPending) {
         m_composeAtSwapCompletion = true;
     } else {
         scheduleRepaint();
@@ -768,79 +828,16 @@
     return false;
 }
 
-void Compositor::setCompositeTimer()
+void Compositor::setFallbackTimer()
 {
-    if (m_state != State::On) {
+    if (m_fallbackTimer.isActive()) {
         return;
     }
 
-    // Don't start the timer if we're waiting for a swap event
-    if (m_bufferSwapPending && m_composeAtSwapCompletion)
-        return;
+    uint waitTime = 1000 / refreshRate();
 
-    // Don't start the timer if all outputs are disabled
-    if (!kwinApp()->platform()->areOutputsEnabled()) {
-        return;
-    }
-
-    uint waitTime = 1;
-
-    if (m_scene->blocksForRetrace()) {
-
-        // TODO: make vBlankTime dynamic?!
-        // It's required because glXWaitVideoSync will *likely* block a full frame if one enters
-        // a retrace pass which can last a variable amount of time, depending on the actual screen
-        // Now, my ooold 19" CRT can do such retrace so that 2ms are entirely sufficient,
-        // while another ooold 15" TFT requires about 6ms
-
-        qint64 padding = m_timeSinceLastVBlank;
-        if (padding > fpsInterval) {
-            // We're at low repaints or spent more time in painting than the user wanted to wait
-            // for that frame. Align to next vblank:
-            padding = vBlankInterval - (padding % vBlankInterval);
-        } else {
-            // Align to the next maxFps tick:
-            // "remaining time of the first vsync" + "time for the other vsyncs of the frame"
-            padding = ((vBlankInterval - padding % vBlankInterval) +
-                       (fpsInterval / vBlankInterval - 1) * vBlankInterval);
-        }
-
-        if (padding < options->vBlankTime()) {
-            // We'll likely miss this frame so we add one:
-            waitTime = nanoToMilli(padding + vBlankInterval - options->vBlankTime());
-        } else {
-            waitTime = nanoToMilli(padding - options->vBlankTime());
-        }
-    }
-    else { // w/o blocking vsync we just jump to the next demanded tick
-        if (fpsInterval > m_timeSinceLastVBlank) {
-            waitTime = nanoToMilli(fpsInterval - m_timeSinceLastVBlank);
-            if (!waitTime) {
-                // Will ensure we don't block out the eventloop - the system's just not faster ...
-                waitTime = 1;
-            }
-        }
-        /* else if (m_scene->syncsToVBlank() && m_timeSinceLastVBlank - fpsInterval < (vBlankInterval<<1)) {
-            // NOTICE - "for later" ------------------------------------------------------------------
-            // It can happen that we push two frames within one refresh cycle.
-            // Swapping will then block even with triple buffering when the GPU does not discard but
-            // queues frames
-            // now here's the mean part: if we take that as "OMG, we're late - next frame ASAP",
-            // there'll immediately be 2 frames in the pipe, swapping will block, we think we're
-            // late ... ewww
-            // so instead we pad to the clock again and add 2ms safety to ensure the pipe is really
-            // free
-            // NOTICE: obviously m_timeSinceLastVBlank can be too big because we're too slow as well
-            // So if this code was enabled, we'd needlessly half the framerate once more (15 instead of 30)
-            waitTime = nanoToMilli(vBlankInterval - (m_timeSinceLastVBlank - fpsInterval)%vBlankInterval) + 2;
-        }*/
-        else {
-            // "0" would be sufficient here, but the compositor isn't the WMs only task.
-            waitTime = 1;
-        }
-    }
     // Force 4fps minimum:
-    compositeTimer.start(qMin(waitTime, 250u), this);
+    m_fallbackTimer.start(qMin(waitTime, 250u), this);
 }
 
 bool Compositor::isActive()
@@ -887,7 +884,7 @@
     // TODO: This makes no sense on Wayland. First step would be to atleast
     //       set the refresh rate to the highest available one. Second step
     //       would be to not use a uniform value at all but per screen.
-    return KWin::currentRefreshRate();
+    return 60;
 }
 
 void WaylandCompositor::updateCompositeBlocking()
@@ -990,13 +987,13 @@
     m_xrrRefreshRate = KWin::currentRefreshRate();
     startupWithWorkspace();
 }
-void X11Compositor::performCompositing()
+void X11Compositor::performCompositing(bool force)
 {
     if (scene()->usesOverlayWindow() && !isOverlayWindowVisible()) {
         // Return since nothing is visible.
         return;
     }
-    Compositor::performCompositing();
+    Compositor::performCompositing(force);
 }
 
 bool X11Compositor::checkForOverlayWindow(WId w) const
diff --git a/config-kwin.h.cmake b/config-kwin.h.cmake
--- a/config-kwin.h.cmake
+++ b/config-kwin.h.cmake
@@ -1,6 +1,7 @@
 #cmakedefine KWIN_BUILD_DECORATIONS 1
 #cmakedefine KWIN_BUILD_TABBOX 1
 #cmakedefine KWIN_BUILD_ACTIVITIES 1
+#cmakedefine KWIN_BUILD_PERF 1
 #define KWIN_NAME "${KWIN_NAME}"
 #define KWIN_INTERNAL_NAME_X11 "${KWIN_INTERNAL_NAME_X11}"
 #define KWIN_CONFIG "${KWIN_NAME}rc"
diff --git a/data/org_kde_kwin.categories b/data/org_kde_kwin.categories
--- a/data/org_kde_kwin.categories
+++ b/data/org_kde_kwin.categories
@@ -20,3 +20,4 @@
 kwin_scene_xrender KWin XRender based compositor scene plugin DEFAULT_SEVERITY [CRITICAL] IDENTIFIER [KWIN_XRENDER]
 kwin_scene_qpainter KWin QPainter based compositor scene plugin DEFAULT_SEVERITY [CRITICAL] IDENTIFIER [KWIN_QPAINTER]
 kwin_scene_opengl KWin OpenGL based compositor scene plugins DEFAULT_SEVERITY [CRITICAL] IDENTIFIER [KWIN_OPENGL]
+kwin_perf KWin Performance measurement and debugging tools DEFAULT_SEVERITY [CRITICAL] IDENTIFIER [KWIN_PERF]
diff --git a/dbusinterface.h b/dbusinterface.h
--- a/dbusinterface.h
+++ b/dbusinterface.h
@@ -68,6 +68,7 @@
     QString supportInformation();
     Q_NOREPLY void unclutterDesktop();
     Q_NOREPLY void showDebugConsole();
+    bool enableFtrace(bool enable);
 
     QVariantMap queryWindowInfo();
     QVariantMap getWindowInfo(const QString &uuid);
diff --git a/dbusinterface.cpp b/dbusinterface.cpp
--- a/dbusinterface.cpp
+++ b/dbusinterface.cpp
@@ -28,6 +28,9 @@
 #include "atoms.h"
 #include "composite.h"
 #include "debug_console.h"
+#ifdef KWIN_BUILD_PERF
+#include "helpers/perf/ftrace_marker.h"
+#endif
 #include "main.h"
 #include "placement.h"
 #include "platform.h"
@@ -189,6 +192,19 @@
     console->show();
 }
 
+bool DBusInterface::enableFtrace(bool enable)
+{
+#ifdef KWIN_BUILD_PERF
+    if (!Perf::FtraceMarker::self()) {
+        return false;
+    }
+    return Perf::FtraceMarker::self()->setEnabled(enable);
+#else
+    Q_UNUSED(enable)
+    return false;
+#endif
+}
+
 namespace {
 QVariantMap clientToVariantMap(const AbstractClient *c)
 {
diff --git a/platformsupport/scenes/opengl/swap_profiler.h b/helpers/perf/ftrace_marker.h
rename from platformsupport/scenes/opengl/swap_profiler.h
rename to helpers/perf/ftrace_marker.h
--- a/platformsupport/scenes/opengl/swap_profiler.h
+++ b/helpers/perf/ftrace_marker.h
@@ -2,8 +2,7 @@
  KWin - the KDE window manager
  This file is part of the KDE project.
 
-Copyright (C) 2006 Lubos Lunak <l.lunak@kde.org>
-Copyright (C) 2009, 2010, 2011 Martin Gräßlin <mgraesslin@kde.org>
+Copyright 2019 Roman Gilg <subdiff@gmail.com>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,36 +17,38 @@
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *********************************************************************/
-#ifndef KWIN_SCENE_OPENGL_SWAP_PROFILER_H
-#define KWIN_SCENE_OPENGL_SWAP_PROFILER_H
+#pragma once
 
-#include <QElapsedTimer>
-#include <kwin_export.h>
+#include <kwinglobals.h>
+
+#include <QFile>
 
 namespace KWin
 {
+namespace Perf
+{
 
 /**
- * @short Profiler to detect whether we have triple buffering
- * The strategy is to start setBlocksForRetrace(false) but assume blocking and have the system prove that assumption wrong
+ * Provides an interface to mark the Ftrace output for debugging.
  */
-class KWIN_EXPORT SwapProfiler
+class FtraceMarker : public QObject
 {
+    Q_OBJECT
 public:
-    SwapProfiler();
-    void init();
-    void begin();
-    /**
-     * @return char being 'd' for double, 't' for triple (or more - but non-blocking) buffering and
-     * 0 (NOT '0') otherwise, so you can act on "if (char result = SwapProfiler::end()) { fooBar(); }
-     */
-    char end();
+    virtual ~FtraceMarker() = default;
+
+    bool setEnabled(bool enable);
+    void print(const QString &message);
+    void print(ulong ctx, const QString &message);
+    void print(const QString &message, ulong ctx);
+
 private:
-    QElapsedTimer m_timer;
-    qint64  m_time;
-    int m_counter;
+    bool findFile();
+
+    QFile *m_file = nullptr;
+
+    KWIN_SINGLETON(FtraceMarker)
 };
 
 }
-
-#endif
+}
diff --git a/helpers/perf/ftrace_marker.cpp b/helpers/perf/ftrace_marker.cpp
new file mode 100644
--- /dev/null
+++ b/helpers/perf/ftrace_marker.cpp
@@ -0,0 +1,135 @@
+/********************************************************************
+ KWin - the KDE window manager
+ This file is part of the KDE project.
+
+Copyright 2019 Roman Gilg <subdiff@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*********************************************************************/
+#include "ftrace_marker.h"
+
+#include "utils.h"
+
+#include <QDir>
+#include <QFileInfo>
+
+namespace KWin
+{
+namespace Perf
+{
+
+void writeFunctionEnabled(QFile *file, const QString &message)
+{
+    file->write(message.toLatin1());
+    file->flush();
+}
+
+void writeFunctionDisabled(QFile *file, const QString &message)
+{
+    Q_UNUSED(file)
+    Q_UNUSED(message)
+}
+
+void (*s_writeFunction)(QFile *file, const QString &message) = writeFunctionDisabled;
+
+
+KWIN_SINGLETON_FACTORY(FtraceMarker)
+
+FtraceMarker::FtraceMarker(QObject *parent)
+    : QObject(parent)
+{
+    if (qEnvironmentVariableIsSet("KWIN_PERF_FTRACE")) {
+        qCDebug(KWIN_PERF) << "Ftrace marking initially enabled via environment variable";
+        setEnabled(true);
+    }
+}
+
+bool FtraceMarker::setEnabled(bool enable)
+{
+    if ((bool)m_file == enable) {
+        // no change
+        return true;
+    }
+    if (enable) {
+        if (!findFile()) {
+            qCWarning(KWIN_PERF) << "Ftrace marking not available. Try reenabling after issue is solved.";
+            return false;
+        }
+        s_writeFunction = writeFunctionEnabled;
+    } else {
+        s_writeFunction = writeFunctionDisabled;
+        delete m_file;
+        m_file = nullptr;
+    }
+    return true;
+}
+
+void FtraceMarker::print(const QString &message)
+{
+    (*s_writeFunction)(m_file, message);
+}
+
+void FtraceMarker::print(ulong ctx, const QString &message)
+{
+    (*s_writeFunction)(m_file, message + QStringLiteral(" (begin_ctx=%1)").arg(ctx));
+}
+
+void FtraceMarker::print(const QString &message, ulong ctx)
+{
+    (*s_writeFunction)(m_file, message + QStringLiteral(" (end_ctx=%1)").arg(ctx));
+}
+
+bool FtraceMarker::findFile()
+{
+    QFile mountsFile("/proc/mounts");
+    if (!mountsFile.open(QIODevice::ReadOnly | QIODevice::Text)) {
+        qCWarning(KWIN_PERF) << "No acces to mounts file. Can not determine trace marker file location.";
+        return false;
+    }
+
+    auto setInfo = [](const QString &line) {
+        const int start = line.indexOf(' ') + 1;
+        const int end = line.indexOf(' ', start);
+        return QFileInfo(QDir(line.mid(start, end - start)),
+                         QString::fromLatin1("trace_marker"));
+    };
+    QFileInfo markerFileInfo;
+    QTextStream mountsIn(&mountsFile);
+    QString mountsLine = mountsIn.readLine();
+
+    while (!mountsLine.isNull()) {
+        if (mountsLine.startsWith("tracefs")) {
+            markerFileInfo = setInfo(mountsLine);
+            break;
+        }
+        if (mountsLine.startsWith("debugfs")) {
+            markerFileInfo = setInfo(mountsLine);
+        }
+        mountsLine = mountsIn.readLine();
+    }
+    mountsFile.close();
+
+    const QString path = markerFileInfo.absoluteFilePath();
+    m_file = new QFile(path, this);
+    if (!m_file->open(QIODevice::WriteOnly)) {
+        qCWarning(KWIN_PERF) << "No access to trace marker file at:" << path;
+        delete m_file;
+        m_file = nullptr;
+        return false;
+    }
+    return true;
+}
+
+}
+}
diff --git a/main.cpp b/main.cpp
--- a/main.cpp
+++ b/main.cpp
@@ -37,6 +37,8 @@
 
 #include <kwineffects.h>
 
+#include "helpers/perf/ftrace_marker.h"
+
 // KDE
 #include <KAboutData>
 #include <KLocalizedString>
@@ -108,6 +110,10 @@
     , m_inputConfig()
     , m_operationMode(mode)
 {
+#ifdef KWIN_BUILD_PERF
+    Perf::FtraceMarker::create(this);
+#endif
+
     qRegisterMetaType<Options::WindowOperation>("Options::WindowOperation");
     qRegisterMetaType<KWin::EffectWindow*>();
     qRegisterMetaType<KWayland::Server::SurfaceInterface *>("KWayland::Server::SurfaceInterface *");
diff --git a/options.cpp b/options.cpp
--- a/options.cpp
+++ b/options.cpp
@@ -81,7 +81,7 @@
     // however, additional throttling prevents very high rates from taking place anyway
     else if (rate > 1000)
         rate = 1000;
-    qCDebug(KWIN_CORE) << "Vertical Refresh rate " << rate << "Hz (" << syncScreenName << ")";
+//    qCDebug(KWIN_CORE) << "Vertical Refresh rate " << rate << "Hz (" << syncScreenName << ")";
     return rate;
 }
 
diff --git a/org.kde.KWin.xml b/org.kde.KWin.xml
--- a/org.kde.KWin.xml
+++ b/org.kde.KWin.xml
@@ -45,5 +45,10 @@
         <arg type="s" direction="in"/>
         <arg type="a{sv}" direction="out"/>
     </method>
+    <method name="enableFtrace">
+        <annotation name="org.qtproject.QtDBus.QtTypeName.Out0" value="bool"/>
+        <arg type="b" direction="in"/>
+        <arg type="b" direction="out"/>
+    </method>
   </interface>
 </node>
diff --git a/platformsupport/scenes/opengl/swap_profiler.h b/perf.h
rename from platformsupport/scenes/opengl/swap_profiler.h
rename to perf.h
--- a/platformsupport/scenes/opengl/swap_profiler.h
+++ b/perf.h
@@ -2,8 +2,7 @@
  KWin - the KDE window manager
  This file is part of the KDE project.
 
-Copyright (C) 2006 Lubos Lunak <l.lunak@kde.org>
-Copyright (C) 2009, 2010, 2011 Martin Gräßlin <mgraesslin@kde.org>
+Copyright 2019 Roman Gilg <subdiff@gmail.com>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,36 +17,47 @@
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *********************************************************************/
-#ifndef KWIN_SCENE_OPENGL_SWAP_PROFILER_H
-#define KWIN_SCENE_OPENGL_SWAP_PROFILER_H
+#pragma once
 
-#include <QElapsedTimer>
 #include <kwin_export.h>
 
+#include <QString>
+#include <QDebug>
+
 namespace KWin
 {
+namespace Perf
+{
 
 /**
- * @short Profiler to detect whether we have triple buffering
- * The strategy is to start setBlocksForRetrace(false) but assume blocking and have the system prove that assumption wrong
+ * Internal perf API for consumers
  */
-class KWIN_EXPORT SwapProfiler
+class KWIN_EXPORT PerfInterface
 {
+
 public:
-    SwapProfiler();
-    void init();
-    void begin();
-    /**
-     * @return char being 'd' for double, 't' for triple (or more - but non-blocking) buffering and
-     * 0 (NOT '0') otherwise, so you can act on "if (char result = SwapProfiler::end()) { fooBar(); }
-     */
-    char end();
-private:
-    QElapsedTimer m_timer;
-    qint64  m_time;
-    int m_counter;
+    static void ftrace(const QString &message);
+    static void ftrace(ulong ctx, const QString &message);
+    static void ftrace(const QString &message, ulong ctx);
 };
 
+inline
+void ftrace(const QString &message)
+{
+    PerfInterface::ftrace(message);
+}
+
+inline
+void ftrace(ulong ctx, const QString &message)
+{
+    PerfInterface::ftrace(ctx, message);
+}
+
+inline
+void ftrace(const QString &message, ulong ctx)
+{
+    PerfInterface::ftrace(message, ctx);
 }
 
-#endif
+}
+}
diff --git a/platformsupport/scenes/opengl/swap_profiler.h b/perf.cpp
copy from platformsupport/scenes/opengl/swap_profiler.h
copy to perf.cpp
--- a/platformsupport/scenes/opengl/swap_profiler.h
+++ b/perf.cpp
@@ -2,8 +2,7 @@
  KWin - the KDE window manager
  This file is part of the KDE project.
 
-Copyright (C) 2006 Lubos Lunak <l.lunak@kde.org>
-Copyright (C) 2009, 2010, 2011 Martin Gräßlin <mgraesslin@kde.org>
+Copyright 2019 Roman Gilg <subdiff@gmail.com>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,36 +17,50 @@
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *********************************************************************/
-#ifndef KWIN_SCENE_OPENGL_SWAP_PROFILER_H
-#define KWIN_SCENE_OPENGL_SWAP_PROFILER_H
+#include "perf.h"
 
-#include <QElapsedTimer>
-#include <kwin_export.h>
+#include <config-kwin.h>
+
+#ifdef KWIN_BUILD_PERF
+#include "helpers/perf/ftrace_marker.h"
+#endif
 
 namespace KWin
 {
-
-/**
- * @short Profiler to detect whether we have triple buffering
- * The strategy is to start setBlocksForRetrace(false) but assume blocking and have the system prove that assumption wrong
- */
-class KWIN_EXPORT SwapProfiler
+namespace Perf
 {
-public:
-    SwapProfiler();
-    void init();
-    void begin();
-    /**
-     * @return char being 'd' for double, 't' for triple (or more - but non-blocking) buffering and
-     * 0 (NOT '0') otherwise, so you can act on "if (char result = SwapProfiler::end()) { fooBar(); }
-     */
-    char end();
-private:
-    QElapsedTimer m_timer;
-    qint64  m_time;
-    int m_counter;
-};
 
+#ifdef KWIN_BUILD_PERF
+void PerfInterface::ftrace(const QString &message)
+{
+    FtraceMarker::self()->print(message);
 }
+void PerfInterface::ftrace(ulong ctx, const QString &message)
+{
+    FtraceMarker::self()->print(ctx, message);
+}
+void PerfInterface::ftrace(const QString &message, ulong ctx)
+{
+    FtraceMarker::self()->print(message, ctx);
+}
+
+#else
 
+void PerfInterface::ftrace(const QString &message)
+{
+    Q_UNUSED(message)
+}
+void PerfInterface::ftrace(ulong ctx, const QString &message)
+{
+    Q_UNUSED(message)
+    Q_UNUSED(ctx)
+}
+void PerfInterface::ftrace(const QString &message, ulong ctx)
+{
+    Q_UNUSED(message)
+    Q_UNUSED(ctx)
+}
 #endif
+
+}
+}
diff --git a/platformsupport/scenes/opengl/CMakeLists.txt b/platformsupport/scenes/opengl/CMakeLists.txt
--- a/platformsupport/scenes/opengl/CMakeLists.txt
+++ b/platformsupport/scenes/opengl/CMakeLists.txt
@@ -2,7 +2,6 @@
     abstract_egl_backend.cpp
     backend.cpp
     linux_dmabuf.cpp
-    swap_profiler.cpp
     texture.cpp
 )
 
diff --git a/platformsupport/scenes/opengl/backend.h b/platformsupport/scenes/opengl/backend.h
--- a/platformsupport/scenes/opengl/backend.h
+++ b/platformsupport/scenes/opengl/backend.h
@@ -85,6 +85,7 @@
     virtual bool makeCurrent() = 0;
     virtual void doneCurrent() = 0;
     virtual bool usesOverlayWindow() const = 0;
+    virtual bool hasSwapEvent() const { return true; }
     /**
      * Whether the rendering needs to be split per screen.
      * Default implementation returns @c false.
@@ -124,25 +125,6 @@
     bool isFailed() const {
         return m_failed;
     }
-    /**
-     * @brief Whether the Backend provides VSync.
-     *
-     * Currently only the GLX backend can provide VSync.
-     *
-     * @return bool @c true if VSync support is available, @c false otherwise
-     */
-    bool syncsToVBlank() const {
-        return m_syncsToVBlank;
-    }
-    /**
-     * @brief Whether VSync blocks execution until the screen is in the retrace
-     *
-     * Case for waitVideoSync and non triple buffering buffer swaps
-     *
-     */
-    bool blocksForRetrace() const {
-        return m_blocksForRetrace;
-    }
     /**
      * @brief Whether the backend uses direct rendering.
      *
@@ -211,26 +193,6 @@
      * @param reason The reason why the initialization failed.
      */
     void setFailed(const QString &reason);
-    /**
-     * @brief Sets whether the backend provides VSync.
-     *
-     * Should be called by the concrete subclass once it is determined whether VSync is supported.
-     * If the subclass does not call this method, the backend defaults to @c false.
-     * @param enabled @c true if VSync support available, @c false otherwise.
-     */
-    void setSyncsToVBlank(bool enabled) {
-        m_syncsToVBlank = enabled;
-    }
-    /**
-     * @brief Sets whether the VSync iplementation blocks
-     *
-     * Should be called by the concrete subclass once it is determined how VSync works.
-     * If the subclass does not call this method, the backend defaults to @c false.
-     * @param enabled @c true if VSync blocks, @c false otherwise.
-     */
-    void setBlocksForRetrace(bool enabled) {
-        m_blocksForRetrace = enabled;
-    }
     /**
      * @brief Sets whether the OpenGL context is direct.
      *
@@ -283,14 +245,6 @@
     }
 
 private:
-    /**
-     * @brief Whether VSync is available and used, defaults to @c false.
-     */
-    bool m_syncsToVBlank;
-    /**
-     * @brief Whether present() will block execution until the next vertical retrace @c false.
-     */
-    bool m_blocksForRetrace;
     /**
      * @brief Whether direct rendering is used, defaults to @c false.
      */
diff --git a/platformsupport/scenes/opengl/backend.cpp b/platformsupport/scenes/opengl/backend.cpp
--- a/platformsupport/scenes/opengl/backend.cpp
+++ b/platformsupport/scenes/opengl/backend.cpp
@@ -30,9 +30,7 @@
 {
 
 OpenGLBackend::OpenGLBackend()
-    : m_syncsToVBlank(false)
-    , m_blocksForRetrace(false)
-    , m_directRendering(false)
+    : m_directRendering(false)
     , m_haveBufferAge(false)
     , m_failed(false)
 {
diff --git a/platformsupport/scenes/opengl/swap_profiler.cpp b/platformsupport/scenes/opengl/swap_profiler.cpp
deleted file mode 100644
--- a/platformsupport/scenes/opengl/swap_profiler.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/********************************************************************
- KWin - the KDE window manager
- This file is part of the KDE project.
-
-Copyright (C) 2006 Lubos Lunak <l.lunak@kde.org>
-Copyright (C) 2009, 2010, 2011 Martin Gräßlin <mgraesslin@kde.org>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*********************************************************************/
-#include "swap_profiler.h"
-#include <logging.h>
-
-namespace KWin
-{
-
-SwapProfiler::SwapProfiler()
-{
-    init();
-}
-
-void SwapProfiler::init()
-{
-    m_time = 2 * 1000*1000; // we start with a long time mean of 2ms ...
-    m_counter = 0;
-}
-
-void SwapProfiler::begin()
-{
-    m_timer.start();
-}
-
-char SwapProfiler::end()
-{
-    // .. and blend in actual values.
-    // this way we prevent extremes from killing our long time mean
-    m_time = (10*m_time + m_timer.nsecsElapsed())/11;
-    if (++m_counter > 500) {
-        const bool blocks = m_time > 1000 * 1000; // 1ms, i get ~250µs and ~7ms w/o triple buffering...
-        qCDebug(KWIN_OPENGL) << "Triple buffering detection:" << QString(blocks ? QStringLiteral("NOT available") : QStringLiteral("Available")) <<
-                        " - Mean block time:" << m_time/(1000.0*1000.0) << "ms";
-        return blocks ? 'd' : 't';
-    }
-    return 0;
-}
-
-}
diff --git a/plugins/platforms/drm/egl_gbm_backend.cpp b/plugins/platforms/drm/egl_gbm_backend.cpp
--- a/plugins/platforms/drm/egl_gbm_backend.cpp
+++ b/plugins/platforms/drm/egl_gbm_backend.cpp
@@ -42,7 +42,6 @@
 {
     // Egl is always direct rendering
     setIsDirectRendering(true);
-    setSyncsToVBlank(true);
     connect(m_backend, &DrmBackend::outputAdded, this, &EglGbmBackend::createOutput);
     connect(m_backend, &DrmBackend::outputRemoved, this,
         [this] (DrmOutput *output) {
diff --git a/plugins/platforms/drm/egl_stream_backend.cpp b/plugins/platforms/drm/egl_stream_backend.cpp
--- a/plugins/platforms/drm/egl_stream_backend.cpp
+++ b/plugins/platforms/drm/egl_stream_backend.cpp
@@ -86,7 +86,6 @@
     : AbstractEglBackend(), m_backend(b)
 {
     setIsDirectRendering(true);
-    setSyncsToVBlank(true);
     connect(m_backend, &DrmBackend::outputAdded, this, &EglStreamBackend::createOutput);
     connect(m_backend, &DrmBackend::outputRemoved, this,
         [this] (DrmOutput *output) {
diff --git a/plugins/platforms/hwcomposer/egl_hwcomposer_backend.cpp b/plugins/platforms/hwcomposer/egl_hwcomposer_backend.cpp
--- a/plugins/platforms/hwcomposer/egl_hwcomposer_backend.cpp
+++ b/plugins/platforms/hwcomposer/egl_hwcomposer_backend.cpp
@@ -30,8 +30,6 @@
 {
     // EGL is always direct rendering
     setIsDirectRendering(true);
-    setSyncsToVBlank(true);
-    setBlocksForRetrace(true);
 }
 
 EglHwcomposerBackend::~EglHwcomposerBackend()
diff --git a/plugins/platforms/x11/common/eglonxbackend.h b/plugins/platforms/x11/common/eglonxbackend.h
--- a/plugins/platforms/x11/common/eglonxbackend.h
+++ b/plugins/platforms/x11/common/eglonxbackend.h
@@ -20,7 +20,6 @@
 #ifndef KWIN_EGL_ON_X_BACKEND_H
 #define KWIN_EGL_ON_X_BACKEND_H
 #include "abstract_egl_backend.h"
-#include "swap_profiler.h"
 
 #include <xcb/xcb.h>
 
@@ -82,7 +81,6 @@
     xcb_window_t m_renderingWindow = XCB_WINDOW_NONE;
     bool m_havePlatformBase = false;
     bool m_x11TextureFromPixmapSupported = true;
-    SwapProfiler m_swapProfiler;
     friend class EglTexture;
 };
 
diff --git a/plugins/platforms/x11/common/eglonxbackend.cpp b/plugins/platforms/x11/common/eglonxbackend.cpp
--- a/plugins/platforms/x11/common/eglonxbackend.cpp
+++ b/plugins/platforms/x11/common/eglonxbackend.cpp
@@ -73,19 +73,13 @@
     setIsDirectRendering(true);
 }
 
-static bool gs_tripleBufferUndetected = true;
-static bool gs_tripleBufferNeedsDetection = false;
-
 EglOnXBackend::~EglOnXBackend()
 {
     if (isFailed() && m_overlayWindow) {
         m_overlayWindow->destroy();
     }
     cleanup();
 
-    gs_tripleBufferUndetected = true;
-    gs_tripleBufferNeedsDetection = false;
-
     if (m_overlayWindow) {
         if (overlayWindow()->window()) {
             overlayWindow()->destroy();
@@ -127,35 +121,20 @@
         }
     }
 
-    setSyncsToVBlank(false);
-    setBlocksForRetrace(false);
-    gs_tripleBufferNeedsDetection = false;
-    m_swapProfiler.init();
     if (surfaceHasSubPost) {
         qCDebug(KWIN_CORE) << "EGL implementation and surface support eglPostSubBufferNV, let's use it";
 
-        if (options->glPreferBufferSwap() != Options::NoSwapEncourage) {
-            // check if swap interval 1 is supported
-            EGLint val;
-            eglGetConfigAttrib(eglDisplay(), config(), EGL_MAX_SWAP_INTERVAL, &val);
-            if (val >= 1) {
-                if (eglSwapInterval(eglDisplay(), 1)) {
-                    qCDebug(KWIN_CORE) << "Enabled v-sync";
-                    setSyncsToVBlank(true);
-                    const QByteArray tripleBuffer = qgetenv("KWIN_TRIPLE_BUFFER");
-                    if (!tripleBuffer.isEmpty()) {
-                        setBlocksForRetrace(qstrcmp(tripleBuffer, "0") == 0);
-                        gs_tripleBufferUndetected = false;
-                    }
-                    gs_tripleBufferNeedsDetection = gs_tripleBufferUndetected;
-                }
-            } else {
-                qCWarning(KWIN_CORE) << "Cannot enable v-sync as max. swap interval is" << val;
+        // check if swap interval 1 is supported
+        EGLint val;
+        eglGetConfigAttrib(eglDisplay(), config(), EGL_MAX_SWAP_INTERVAL, &val);
+        if (val >= 1) {
+            if (eglSwapInterval(eglDisplay(), 1)) {
+                qCDebug(KWIN_CORE) << "Enabled v-sync";
             }
         } else {
-            // disable v-sync
-            eglSwapInterval(eglDisplay(), 0);
+            qCWarning(KWIN_CORE) << "Cannot enable v-sync as max. swap interval is" << val;
         }
+
     } else {
         /* In the GLX backend, we fall back to using glCopyPixels if we have no extension providing support for partial screen updates.
          * However, that does not work in EGL - glCopyPixels with glDrawBuffer(GL_FRONT); does nothing.
@@ -343,32 +322,8 @@
     const bool fullRepaint = supportsBufferAge() || (damage == screenGeometry);
 
     if (fullRepaint || !surfaceHasSubPost) {
-        if (gs_tripleBufferNeedsDetection) {
-            eglWaitGL();
-            m_swapProfiler.begin();
-        }
         // the entire screen changed, or we cannot do partial updates (which implies we enabled surface preservation)
         eglSwapBuffers(eglDisplay(), surface);
-        if (gs_tripleBufferNeedsDetection) {
-            eglWaitGL();
-            if (char result = m_swapProfiler.end()) {
-                gs_tripleBufferUndetected = gs_tripleBufferNeedsDetection = false;
-                if (result == 'd' && GLPlatform::instance()->driver() == Driver_NVidia) {
-                    // TODO this is a workaround, we should get __GL_YIELD set before libGL checks it
-                    if (qstrcmp(qgetenv("__GL_YIELD"), "USLEEP")) {
-                        options->setGlPreferBufferSwap(0);
-                        eglSwapInterval(eglDisplay(), 0);
-                        result = 0; // hint proper behavior
-                        qCWarning(KWIN_CORE) << "\nIt seems you are using the nvidia driver without triple buffering\n"
-                                          "You must export __GL_YIELD=\"USLEEP\" to prevent large CPU overhead on synced swaps\n"
-                                          "Preferably, enable the TripleBuffer Option in the xorg.conf Device\n"
-                                          "For this reason, the tearing prevention has been disabled.\n"
-                                          "See https://bugs.kde.org/show_bug.cgi?id=322060\n";
-                    }
-                }
-                setBlocksForRetrace(result == 'd');
-            }
-        }
         if (supportsBufferAge()) {
             eglQuerySurface(eglDisplay(), surface, EGL_BUFFER_AGE_EXT, &m_bufferAge);
         }
@@ -399,15 +354,6 @@
 {
     QRegion repaint;
 
-    if (gs_tripleBufferNeedsDetection) {
-        // the composite timer floors the repaint frequency. This can pollute our triple buffering
-        // detection because the glXSwapBuffers call for the new frame has to wait until the pending
-        // one scanned out.
-        // So we compensate for that by waiting an extra milisecond to give the driver the chance to
-        // fllush the buffer queue
-        usleep(1000);
-    }
-
     present();
 
     if (supportsBufferAge())
@@ -440,15 +386,9 @@
 
     setLastDamage(renderedRegion);
 
-    if (!blocksForRetrace()) {
-        // This also sets lastDamage to empty which prevents the frame from
-        // being posted again when prepareRenderingFrame() is called.
-        present();
-    } else {
-        // Make sure that the GPU begins processing the command stream
-        // now and not the next time prepareRenderingFrame() is called.
-        glFlush();
-    }
+    // This also sets lastDamage to empty which prevents the frame from
+    // being posted again when prepareRenderingFrame() is called.
+    present();
 
     if (m_overlayWindow && overlayWindow()->window())  // show the window only after the first pass,
         overlayWindow()->show();   // since that pass may take long
diff --git a/plugins/platforms/x11/standalone/glxbackend.h b/plugins/platforms/x11/standalone/glxbackend.h
--- a/plugins/platforms/x11/standalone/glxbackend.h
+++ b/plugins/platforms/x11/standalone/glxbackend.h
@@ -21,7 +21,6 @@
 #define KWIN_GLX_BACKEND_H
 #include "backend.h"
 #include "texture.h"
-#include "swap_profiler.h"
 #include "x11eventfilter.h"
 
 #include <xcb/glx.h>
@@ -32,10 +31,6 @@
 namespace KWin
 {
 
-// GLX_MESA_swap_interval
-using glXSwapIntervalMESA_func = int (*)(unsigned int interval);
-extern glXSwapIntervalMESA_func glXSwapIntervalMESA;
-
 class FBConfigInfo
 {
 public:
@@ -47,9 +42,6 @@
 };
 
 
-// ------------------------------------------------------------------
-
-
 class SwapEventFilter : public X11EventFilter
 {
 public:
@@ -78,20 +70,24 @@
     void doneCurrent() override;
     OverlayWindow* overlayWindow() const override;
     bool usesOverlayWindow() const override;
+    bool hasSwapEvent() const override;
     void init() override;
 
 protected:
     void present() override;
 
 private:
-    bool initBuffer();
     bool checkVersion();
+
+    bool initBuffer();
     void initExtensions();
-    void waitSync();
     bool initRenderingContext();
     bool initFbConfig();
     void initVisualDepthHashTable();
-    void setSwapInterval(int interval);
+
+    void swap();
+    void copy();
+
     Display *display() const {
         return m_x11Display;
     }
@@ -111,15 +107,12 @@
     QHash<xcb_visualid_t, int> m_visualDepthHash;
     std::unique_ptr<SwapEventFilter> m_swapEventFilter;
     int m_bufferAge;
+
+    bool m_haveOMLSyncControl = false;
     bool m_haveMESACopySubBuffer = false;
-    bool m_haveMESASwapControl = false;
-    bool m_haveEXTSwapControl = false;
-    bool m_haveSGISwapControl = false;
     bool m_haveINTELSwapEvent = false;
-    bool haveSwapInterval = false;
-    bool haveWaitSync = false;
+
     Display *m_x11Display;
-    SwapProfiler m_swapProfiler;
     friend class GlxTexture;
 };
 
diff --git a/plugins/platforms/x11/standalone/glxbackend.cpp b/plugins/platforms/x11/standalone/glxbackend.cpp
--- a/plugins/platforms/x11/standalone/glxbackend.cpp
+++ b/plugins/platforms/x11/standalone/glxbackend.cpp
@@ -30,6 +30,7 @@
 #include "options.h"
 #include "overlaywindow.h"
 #include "composite.h"
+#include "perf.h"
 #include "platform.h"
 #include "scene.h"
 #include "screens.h"
@@ -99,36 +100,22 @@
     return false;
 }
 
-
-// -----------------------------------------------------------------------
-
-
-
 GlxBackend::GlxBackend(Display *display)
     : OpenGLBackend()
     , m_overlayWindow(kwinApp()->platform()->createOverlayWindow())
     , window(None)
     , fbconfig(NULL)
     , glxWindow(None)
     , ctx(nullptr)
     , m_bufferAge(0)
-    , haveSwapInterval(false)
     , m_x11Display(display)
 {
-     // Ensures calls to glXSwapBuffers will always block until the next
-     // retrace when using the proprietary NVIDIA driver. This must be
-     // set before libGL.so is loaded.
-     setenv("__GL_MaxFramesAllowed", "1", true);
-
      // Force initialization of GLX integration in the Qt's xcb backend
      // to make it call XESetWireToEvent callbacks, which is required
      // by Mesa when using DRI2.
      QOpenGLContext::supportsThreadedOpenGL();
 }
 
-static bool gs_tripleBufferUndetected = true;
-static bool gs_tripleBufferNeedsDetection = false;
-
 GlxBackend::~GlxBackend()
 {
     if (isFailed()) {
@@ -139,9 +126,6 @@
     cleanupGL();
     doneCurrent();
 
-    gs_tripleBufferUndetected = true;
-    gs_tripleBufferNeedsDetection = false;
-
     if (ctx)
         glXDestroyContext(display(), ctx);
 
@@ -172,7 +156,6 @@
 #endif
     return ret;
 }
-glXSwapIntervalMESA_func glXSwapIntervalMESA;
 
 void GlxBackend::init()
 {
@@ -183,14 +166,6 @@
     }
 
     initExtensions();
-
-    // resolve glXSwapIntervalMESA if available
-    if (hasExtension(QByteArrayLiteral("GLX_MESA_swap_control"))) {
-        glXSwapIntervalMESA = (glXSwapIntervalMESA_func) getProcAddress("glXSwapIntervalMESA");
-    } else {
-        glXSwapIntervalMESA = nullptr;
-    }
-
     initVisualDepthHashTable();
 
     if (!initBuffer()) {
@@ -213,60 +188,29 @@
     initGL(&getProcAddress);
 
     // Check whether certain features are supported
+    m_haveOMLSyncControl    = hasExtension(QByteArrayLiteral("GLX_OML_sync_control"));
     m_haveMESACopySubBuffer = hasExtension(QByteArrayLiteral("GLX_MESA_copy_sub_buffer"));
-    m_haveMESASwapControl   = hasExtension(QByteArrayLiteral("GLX_MESA_swap_control"));
-    m_haveEXTSwapControl    = hasExtension(QByteArrayLiteral("GLX_EXT_swap_control"));
-    m_haveSGISwapControl    = hasExtension(QByteArrayLiteral("GLX_SGI_swap_control"));
+
     // only enable Intel swap event if env variable is set, see BUG 342582
     m_haveINTELSwapEvent    = hasExtension(QByteArrayLiteral("GLX_INTEL_swap_event"))
                                 && qgetenv("KWIN_USE_INTEL_SWAP_EVENT") == QByteArrayLiteral("1");
 
-    if (m_haveINTELSwapEvent) {
+    qDebug() << "\nGlxBackend::init" << m_haveOMLSyncControl
+             << m_haveMESACopySubBuffer << m_haveINTELSwapEvent;
+
+    if (hasSwapEvent()) {
         m_swapEventFilter = std::make_unique<SwapEventFilter>(window, glxWindow);
         glXSelectEvent(display(), glxWindow, GLX_BUFFER_SWAP_COMPLETE_INTEL_MASK);
     }
 
-    haveSwapInterval = m_haveMESASwapControl || m_haveEXTSwapControl || m_haveSGISwapControl;
-
     setSupportsBufferAge(false);
-
     if (hasExtension(QByteArrayLiteral("GLX_EXT_buffer_age"))) {
         const QByteArray useBufferAge = qgetenv("KWIN_USE_BUFFER_AGE");
-
-        if (useBufferAge != "0")
+        if (useBufferAge != "0") {
             setSupportsBufferAge(true);
+        }
     }
 
-    setSyncsToVBlank(false);
-    setBlocksForRetrace(false);
-    haveWaitSync = false;
-    gs_tripleBufferNeedsDetection = false;
-    m_swapProfiler.init();
-    const bool wantSync = options->glPreferBufferSwap() != Options::NoSwapEncourage;
-    if (wantSync && glXIsDirect(display(), ctx)) {
-        if (haveSwapInterval) { // glXSwapInterval is preferred being more reliable
-            setSwapInterval(1);
-            setSyncsToVBlank(true);
-            const QByteArray tripleBuffer = qgetenv("KWIN_TRIPLE_BUFFER");
-            if (!tripleBuffer.isEmpty()) {
-                setBlocksForRetrace(qstrcmp(tripleBuffer, "0") == 0);
-                gs_tripleBufferUndetected = false;
-            }
-            gs_tripleBufferNeedsDetection = gs_tripleBufferUndetected;
-        } else if (hasExtension(QByteArrayLiteral("GLX_SGI_video_sync"))) {
-            unsigned int sync;
-            if (glXGetVideoSyncSGI(&sync) == 0 && glXWaitVideoSyncSGI(1, 0, &sync) == 0) {
-                setSyncsToVBlank(true);
-                setBlocksForRetrace(true);
-                haveWaitSync = true;
-            } else
-                qCWarning(KWIN_X11STANDALONE) << "NO VSYNC! glXSwapInterval is not supported, glXWaitVideoSync is supported but broken";
-        } else
-            qCWarning(KWIN_X11STANDALONE) << "NO VSYNC! neither glSwapInterval nor glXWaitVideoSync are supported";
-    } else {
-        // disable v-sync (if possible)
-        setSwapInterval(0);
-    }
     if (glPlatform->isVirtualBox()) {
         // VirtualBox does not support glxQueryDrawable
         // this should actually be in kwinglutils_funcs, but QueryDrawable seems not to be provided by an extension
@@ -279,6 +223,11 @@
     qCDebug(KWIN_X11STANDALONE) << "Direct rendering:" << isDirectRendering();
 }
 
+bool GlxBackend::hasSwapEvent() const
+{
+    return m_haveINTELSwapEvent;
+}
+
 bool GlxBackend::checkVersion()
 {
     int major, minor;
@@ -459,7 +408,8 @@
     if (count > 0)
         XFree(configs);
 
-    std::stable_sort(candidates.begin(), candidates.end(), [](const FBConfig &left, const FBConfig &right) {
+    std::stable_sort(candidates.begin(), candidates.end(),
+                     [](const FBConfig &left, const FBConfig &right) {
         if (left.depth < right.depth)
             return true;
 
@@ -483,8 +433,10 @@
         glXGetFBConfigAttrib(display(), fbconfig, GLX_STENCIL_SIZE, &stencil);
         glXGetFBConfigAttrib(display(), fbconfig, GLX_FRAMEBUFFER_SRGB_CAPABLE_ARB, &srgb);
 
-        qCDebug(KWIN_X11STANDALONE, "Choosing GLXFBConfig %#x X visual %#x depth %d RGBA %d:%d:%d:%d ZS %d:%d sRGB: %d",
-                fbconfig_id, visual_id, visualDepth(visual_id), red, green, blue, alpha, depth, stencil, srgb);
+        qCDebug(KWIN_X11STANDALONE,
+                "Choosing GLXFBConfig %#x X visual %#x depth %d RGBA %d:%d:%d:%d ZS %d:%d sRGB: %d",
+                fbconfig_id, visual_id, visualDepth(visual_id),
+                red, green, blue, alpha, depth, stencil, srgb);
     }
 
     if (fbconfig == nullptr) {
@@ -672,127 +624,71 @@
     return info;
 }
 
-void GlxBackend::setSwapInterval(int interval)
+QRegion GlxBackend::prepareRenderingFrame()
 {
-    if (m_haveEXTSwapControl)
-        glXSwapIntervalEXT(display(), glxWindow, interval);
-    else if (m_haveMESASwapControl)
-        glXSwapIntervalMESA(interval);
-    else if (m_haveSGISwapControl)
-        glXSwapIntervalSGI(interval);
+    const auto repaint = supportsBufferAge() ? accumulatedDamageHistory(m_bufferAge) : QRegion();
+
+    startRenderTimer();
+//    glXWaitX();
+
+    return repaint;
 }
 
-void GlxBackend::waitSync()
+void GlxBackend::swap()
 {
-    // NOTE that vsync has no effect with indirect rendering
-    if (haveWaitSync) {
-        uint sync;
-#if 0
-        // TODO: why precisely is this important?
-        // the sync counter /can/ perform multiple steps during glXGetVideoSync & glXWaitVideoSync
-        // but this only leads to waiting for two frames??!?
-        glXGetVideoSync(&sync);
-        glXWaitVideoSync(2, (sync + 1) % 2, &sync);
-#else
-        glXWaitVideoSyncSGI(1, 0, &sync);
-#endif
+    if (hasSwapEvent()) {
+        Compositor::self()->aboutToSwapBuffers();
+    }
+    glXSwapBuffers(display(), glxWindow);
+    if (supportsBufferAge()) {
+        glXQueryDrawable(display(), glxWindow, GLX_BACK_BUFFER_AGE_EXT, (GLuint *) &m_bufferAge);
     }
 }
 
-void GlxBackend::present()
+void GlxBackend::copy()
 {
-    if (lastDamage().isEmpty())
-        return;
-
-    const QSize &screenSize = screens()->size();
-    const QRegion displayRegion(0, 0, screenSize.width(), screenSize.height());
-    const bool fullRepaint = supportsBufferAge() || (lastDamage() == displayRegion);
-
-    if (fullRepaint) {
-        if (m_haveINTELSwapEvent)
-            Compositor::self()->aboutToSwapBuffers();
-
-        if (haveSwapInterval) {
-            if (gs_tripleBufferNeedsDetection) {
-                glXWaitGL();
-                m_swapProfiler.begin();
-            }
-            glXSwapBuffers(display(), glxWindow);
-            if (gs_tripleBufferNeedsDetection) {
-                glXWaitGL();
-                if (char result = m_swapProfiler.end()) {
-                    gs_tripleBufferUndetected = gs_tripleBufferNeedsDetection = false;
-                    setBlocksForRetrace(result == 'd');
-                }
-            }
-        } else {
-            waitSync();
-            glXSwapBuffers(display(), glxWindow);
-        }
-        if (supportsBufferAge()) {
-            glXQueryDrawable(display(), glxWindow, GLX_BACK_BUFFER_AGE_EXT, (GLuint *) &m_bufferAge);
-        }
-    } else if (m_haveMESACopySubBuffer) {
+    if (m_haveMESACopySubBuffer) {
+        // Optimized copy is possible.
         for (const QRect &r : lastDamage()) {
             // convert to OpenGL coordinates
-            int y = screenSize.height() - r.y() - r.height();
+            int y = screens()->size().height() - r.y() - r.height();
             glXCopySubBufferMESA(display(), glxWindow, r.x(), y, r.width(), r.height());
         }
-    } else { // Copy Pixels (horribly slow on Mesa)
-        glDrawBuffer(GL_FRONT);
-        copyPixels(lastDamage());
-        glDrawBuffer(GL_BACK);
-    }
-
-    setLastDamage(QRegion());
-    if (!supportsBufferAge()) {
-        glXWaitGL();
-        XFlush(display());
+        return;
     }
-}
-
-void GlxBackend::screenGeometryChanged(const QSize &size)
-{
-    doneCurrent();
 
-    XMoveResizeWindow(display(), window, 0, 0, size.width(), size.height());
-    overlayWindow()->setup(window);
-    Xcb::sync();
-
-    makeCurrent();
-    glViewport(0, 0, size.width(), size.height());
-
-    // The back buffer contents are now undefined
-    m_bufferAge = 0;
-}
-
-SceneOpenGLTexturePrivate *GlxBackend::createBackendTexture(SceneOpenGLTexture *texture)
-{
-    return new GlxTexture(texture, this);
+    // Copy Pixels (horribly slow on Mesa)
+    glDrawBuffer(GL_FRONT);
+    copyPixels(lastDamage());
+    glDrawBuffer(GL_BACK);
 }
 
-QRegion GlxBackend::prepareRenderingFrame()
+void GlxBackend::present()
 {
-    QRegion repaint;
-
-    if (gs_tripleBufferNeedsDetection) {
-        // the composite timer floors the repaint frequency. This can pollute our triple buffering
-        // detection because the glXSwapBuffers call for the new frame has to wait until the pending
-        // one scanned out.
-        // So we compensate for that by waiting an extra milisecond to give the driver the chance to
-        // fllush the buffer queue
-        usleep(1000);
+    Perf::ftrace(QStringLiteral("presentA"));
+    if (lastDamage().isEmpty()) {
+        return;
     }
 
-    present();
-
-    if (supportsBufferAge())
-        repaint = accumulatedDamageHistory(m_bufferAge);
+    const QSize &screenSize = screens()->size();
+    const bool isFullRepaint = (lastDamage() ==
+                                QRegion(0, 0, screenSize.width(), screenSize.height()));
+
+    Perf::ftrace(QStringLiteral("presentB"));
+    if (supportsBufferAge() || isFullRepaint) {
+        // Swap is possible because of full repaint or buffer age support.
+        // TODO: Why is buffer age so important for that? We can also swap without it.
+        swap();
+    } else {
+        copy();
+    }
 
-    startRenderTimer();
-    glXWaitX();
+    setLastDamage(QRegion());
 
-    return repaint;
+//    if (!supportsBufferAge()) {
+//        glXWaitGL();
+//        XFlush(display());
+//    }
 }
 
 void GlxBackend::endRenderingFrame(const QRegion &renderedRegion, const QRegion &damagedRegion)
@@ -816,22 +712,33 @@
 
     setLastDamage(renderedRegion);
 
-    if (!blocksForRetrace()) {
-        // This also sets lastDamage to empty which prevents the frame from
-        // being posted again when prepareRenderingFrame() is called.
-        present();
-    } else {
-        // Make sure that the GPU begins processing the command stream
-        // now and not the next time prepareRenderingFrame() is called.
-        glFlush();
-    }
-
-    if (overlayWindow()->window())  // show the window only after the first pass,
-        overlayWindow()->show();   // since that pass may take long
+    // Show the window only after the first pass,
+    // since that pass may take long.
+    if (overlayWindow()->window())
+        overlayWindow()->show();
 
     // Save the damaged region to history
     if (supportsBufferAge())
         addToDamageHistory(damagedRegion);
+
+    present();
+
+//    qDebug() << "GlxBackend::endRenderingFrame END";
+}
+
+void GlxBackend::screenGeometryChanged(const QSize &size)
+{
+    doneCurrent();
+
+    XMoveResizeWindow(display(), window, 0, 0, size.width(), size.height());
+    overlayWindow()->setup(window);
+    Xcb::sync();
+
+    makeCurrent();
+    glViewport(0, 0, size.width(), size.height());
+
+    // The back buffer contents are now undefined
+    m_bufferAge = 0;
 }
 
 bool GlxBackend::makeCurrent()
@@ -859,6 +766,11 @@
     return true;
 }
 
+SceneOpenGLTexturePrivate *GlxBackend::createBackendTexture(SceneOpenGLTexture *texture)
+{
+    return new GlxTexture(texture, this);
+}
+
 /********************************************************
  * GlxTexture
  *******************************************************/
diff --git a/plugins/scenes/opengl/scene_opengl.h b/plugins/scenes/opengl/scene_opengl.h
--- a/plugins/scenes/opengl/scene_opengl.h
+++ b/plugins/scenes/opengl/scene_opengl.h
@@ -53,8 +53,7 @@
     void screenGeometryChanged(const QSize &size) override;
     OverlayWindow *overlayWindow() const override;
     bool usesOverlayWindow() const override;
-    bool blocksForRetrace() const override;
-    bool syncsToVBlank() const override;
+    bool hasSwapEvent() const override;
     bool makeOpenGLContextCurrent() override;
     void doneOpenGLContextCurrent() override;
     Decoration::Renderer *createDecorationRenderer(Decoration::DecoratedClientImpl *impl) override;
diff --git a/plugins/scenes/opengl/scene_opengl.cpp b/plugins/scenes/opengl/scene_opengl.cpp
--- a/plugins/scenes/opengl/scene_opengl.cpp
+++ b/plugins/scenes/opengl/scene_opengl.cpp
@@ -524,14 +524,9 @@
     return m_backend->overlayWindow();
 }
 
-bool SceneOpenGL::syncsToVBlank() const
+bool SceneOpenGL::hasSwapEvent() const
 {
-    return m_backend->syncsToVBlank();
-}
-
-bool SceneOpenGL::blocksForRetrace() const
-{
-    return m_backend->blocksForRetrace();
+    return m_backend->hasSwapEvent();
 }
 
 void SceneOpenGL::idle()
diff --git a/scene.h b/scene.h
--- a/scene.h
+++ b/scene.h
@@ -145,8 +145,7 @@
     enum ImageFilterType { ImageFilterFast, ImageFilterGood };
     // there's nothing to paint (adjust time_diff later)
     virtual void idle();
-    virtual bool blocksForRetrace() const;
-    virtual bool syncsToVBlank() const;
+    virtual bool hasSwapEvent() const;
     virtual OverlayWindow* overlayWindow() const = 0;
 
     virtual bool makeOpenGLContextCurrent();
diff --git a/scene.cpp b/scene.cpp
--- a/scene.cpp
+++ b/scene.cpp
@@ -623,12 +623,7 @@
     Q_UNUSED(opaqueFullscreen);
 }
 
-bool Scene::blocksForRetrace() const
-{
-    return false;
-}
-
-bool Scene::syncsToVBlank() const
+bool Scene::hasSwapEvent() const
 {
     return false;
 }
diff --git a/utils.h b/utils.h
--- a/utils.h
+++ b/utils.h
@@ -40,6 +40,7 @@
 #include <climits>
 Q_DECLARE_LOGGING_CATEGORY(KWIN_CORE)
 Q_DECLARE_LOGGING_CATEGORY(KWIN_VIRTUALKEYBOARD)
+Q_DECLARE_LOGGING_CATEGORY(KWIN_PERF)
 namespace KWin
 {
 
diff --git a/utils.cpp b/utils.cpp
--- a/utils.cpp
+++ b/utils.cpp
@@ -48,6 +48,7 @@
 
 Q_LOGGING_CATEGORY(KWIN_CORE, "kwin_core", QtCriticalMsg)
 Q_LOGGING_CATEGORY(KWIN_VIRTUALKEYBOARD, "kwin_virtualkeyboard", QtCriticalMsg)
+Q_LOGGING_CATEGORY(KWIN_PERF, "kwin_perf", QtCriticalMsg)
 namespace KWin
 {
 
diff --git a/workspace.cpp b/workspace.cpp
--- a/workspace.cpp
+++ b/workspace.cpp
@@ -1381,6 +1381,12 @@
     support.append(yes);
 #else
     support.append(no);
+#endif
+    support.append(QStringLiteral("KWIN_BUILD_PERF: "));
+#ifdef KWIN_BUILD_PERF
+    support.append(yes);
+#else
+    support.append(no);
 #endif
     support.append(QStringLiteral("HAVE_DRM: "));
 #if HAVE_DRM
@@ -1591,11 +1597,6 @@
             }
 
             support.append(QStringLiteral("OpenGL 2 Shaders are used\n"));
-            support.append(QStringLiteral("Painting blocks for vertical retrace: "));
-            if (m_compositor->scene()->blocksForRetrace())
-                support.append(QStringLiteral(" yes\n"));
-            else
-                support.append(QStringLiteral(" no\n"));
             break;
         }
         case XRenderCompositing: