diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8cc25bd..b7fa622 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,93 +1,93 @@
 if (CMAKE_VERSION VERSION_LESS "2.8.12")
     cmake_minimum_required(VERSION 2.8.9)
     set(HEAPTRACK_BUILD_GUI OFF)
 else()
     cmake_minimum_required(VERSION 2.8.12)
 endif()
 
 project(heaptrack)
 enable_testing()
 
 if(NOT CMAKE_BUILD_TYPE)
   message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
   set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build." FORCE)
 endif()
 
 set(HEAPTRACK_VERSION_MAJOR 1)
 set(HEAPTRACK_VERSION_MINOR 0)
 set(HEAPTRACK_VERSION_PATCH 0)
 set(HEAPTRACK_LIB_VERSION 1.0.0)
 set(HEAPTRACK_LIB_SOVERSION 1)
-set(HEAPTRACK_FILE_FORMAT_VERSION 1)
+set(HEAPTRACK_FILE_FORMAT_VERSION 2)
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
 find_package(Libunwind REQUIRED)
 find_package(Boost 1.41.0 REQUIRED COMPONENTS iostreams program_options)
 find_package(Threads REQUIRED)
 find_package(ZLIB REQUIRED)
 include(FeatureSummary)
 
 option(
   HEAPTRACK_BUILD_GUI
   "Disable this option to skip building the Qt5 / KF5 based GUI for heaptrack."
   On
 )
 
 if(HEAPTRACK_BUILD_GUI)
     find_package(Qt5 5.2.0 NO_MODULE OPTIONAL_COMPONENTS Widgets)
     find_package(ECM 1.0.0 NO_MODULE)
     if(Qt5_FOUND AND ECM_FOUND)
         set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${ECM_MODULE_PATH} ${ECM_KDE_MODULE_DIR})
             find_package(KF5 OPTIONAL_COMPONENTS CoreAddons I18n ItemModels ThreadWeaver ConfigWidgets KIO)
             find_package(KChart "2.6.0")
             set_package_properties(KChart PROPERTIES TYPE RECOMMENDED PURPOSE "Required for the heaptrack_gui executable. Get it from the kdiagram module.")
     endif()
 endif()
 
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wpedantic")
 
 include (CheckCXXSourceCompiles)
 check_cxx_source_compiles(
     "#include <unordered_map>
     #include <atomic>
     thread_local int tls;
     int main() { return 0; }"
     HAVE_CXX11_SUPPORT)
 
 if (NOT HAVE_CXX11_SUPPORT)
     message(FATAL_ERROR "Your compiler is too old and does not support the required C++11 features.")
 endif()
 
 check_cxx_source_compiles(
     "#include <stdio_ext.h>
     #include <fcntl.h>
     #include <dlfcn.h>
     #include <link.h>
     int main() { return 0; }"
     HAVE_LINUX_HEADERS)
 
 if (NOT HAVE_LINUX_HEADERS)
     message(FATAL_ERROR "You are missing some Linux headers required to compile heaptrack.")
 endif()
 
 set(BIN_INSTALL_DIR "bin")
 set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32/64)")
 set(LIB_INSTALL_DIR "lib${LIB_SUFFIX}")
 set(LIBEXEC_INSTALL_DIR "${LIB_INSTALL_DIR}/heaptrack/libexec")
 
 file(RELATIVE_PATH LIBEXEC_REL_PATH
     "${CMAKE_INSTALL_PREFIX}/${BIN_INSTALL_DIR}"
     "${CMAKE_INSTALL_PREFIX}/${LIBEXEC_INSTALL_DIR}")
 
 file(RELATIVE_PATH LIB_REL_PATH
     "${CMAKE_INSTALL_PREFIX}/${BIN_INSTALL_DIR}"
     "${CMAKE_INSTALL_PREFIX}/${LIB_INSTALL_DIR}/heaptrack")
 
 add_subdirectory(3rdparty)
 add_subdirectory(src)
 add_subdirectory(tests)
 
 feature_summary(WHAT ALL FATAL_ON_MISSING_REQUIRED_PACKAGES)
diff --git a/src/analyze/accumulatedtracedata.cpp b/src/analyze/accumulatedtracedata.cpp
index dd0e31e..1323897 100644
--- a/src/analyze/accumulatedtracedata.cpp
+++ b/src/analyze/accumulatedtracedata.cpp
@@ -1,649 +1,669 @@
 /*
  * Copyright 2015-2017 Milian Wolff <mail@milianw.de>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include "accumulatedtracedata.h"
 
 #include <algorithm>
 #include <cassert>
 #include <iostream>
 #include <memory>
 
 #include <boost/algorithm/string/predicate.hpp>
 #include <boost/iostreams/filter/gzip.hpp>
 #include <boost/iostreams/filtering_stream.hpp>
 
 #include "util/config.h"
 #include "util/linereader.h"
 #include "util/pointermap.h"
 
 #ifdef __GNUC__
 #define POTENTIALLY_UNUSED __attribute__((unused))
 #else
 #define POTENTIALLY_UNUSED
 #endif
 
 using namespace std;
 
 namespace {
 
 template <typename Base>
 bool operator>>(LineReader& reader, Index<Base>& index)
 {
     return reader.readHex(index.index);
 }
 
 template <typename Base>
 ostream& operator<<(ostream& out, const Index<Base> index)
 {
     out << index.index;
     return out;
 }
 }
 
 AccumulatedTraceData::AccumulatedTraceData()
 {
     instructionPointers.reserve(16384);
     traces.reserve(65536);
     strings.reserve(4096);
     allocations.reserve(16384);
     stopIndices.reserve(4);
     opNewIpIndices.reserve(16);
 }
 
 const string& AccumulatedTraceData::stringify(const StringIndex stringId) const
 {
     if (!stringId || stringId.index > strings.size()) {
         static const string empty;
         return empty;
     } else {
         return strings.at(stringId.index - 1);
     }
 }
 
 string AccumulatedTraceData::prettyFunction(const string& function) const
 {
     if (!shortenTemplates) {
         return function;
     }
     string ret;
     ret.reserve(function.size());
     int depth = 0;
     for (size_t i = 0; i < function.size(); ++i) {
         const auto c = function[i];
         if ((c == '<' || c == '>') && ret.size() >= 8) {
             // don't get confused by C++ operators
             const char* cmp = "operator";
             if (ret.back() == c) {
                 // skip second angle bracket for operator<< or operator>>
                 if (c == '<') {
                     cmp = "operator<";
                 } else {
                     cmp = "operator>";
                 }
             }
             if (boost::algorithm::ends_with(ret, cmp)) {
                 ret.push_back(c);
                 continue;
             }
         }
         if (c == '<') {
             ++depth;
             if (depth == 1) {
                 ret.push_back(c);
             }
         } else if (c == '>') {
             --depth;
         }
         if (depth) {
             continue;
         }
         ret.push_back(c);
     }
     return ret;
 }
 
 bool AccumulatedTraceData::read(const string& inputFile)
 {
     const bool isCompressed = boost::algorithm::ends_with(inputFile, ".gz");
     ifstream file(inputFile, isCompressed ? ios_base::in | ios_base::binary : ios_base::in);
 
     if (!file.is_open()) {
         cerr << "Failed to open heaptrack log file: " << inputFile << endl;
         return false;
     }
 
     boost::iostreams::filtering_istream in;
     if (isCompressed) {
         in.push(boost::iostreams::gzip_decompressor());
     }
     in.push(file);
     return read(in);
 }
 
 bool AccumulatedTraceData::read(istream& in)
 {
     LineReader reader;
     int64_t timeStamp = 0;
 
     vector<string> opNewStrings = {
         // 64 bit
         "operator new(unsigned long)", "operator new[](unsigned long)",
         // 32 bit
         "operator new(unsigned int)", "operator new[](unsigned int)",
     };
     vector<StringIndex> opNewStrIndices;
     opNewStrIndices.reserve(opNewStrings.size());
 
     vector<string> stopStrings = {"main", "__libc_start_main", "__static_initialization_and_destruction_0"};
 
     const bool reparsing = totalTime != 0;
     m_maxAllocationTraceIndex.index = 0;
     totalCost = {};
     peakTime = 0;
     systemInfo = {};
     peakRSS = 0;
     allocations.clear();
     uint fileVersion = 0;
 
     // required for backwards compatibility
     // newer versions handle this in heaptrack_interpret already
     AllocationInfoSet allocationInfoSet;
     PointerMap pointers;
     // in older files, this contains the pointer address, in newer formats
     // it holds the allocation info index. both can be used to find temporary
     // allocations, i.e. when a deallocation follows with the same data
     uint64_t lastAllocationPtr = 0;
 
     while (reader.getLine(in)) {
         if (reader.mode() == 's') {
             if (reparsing) {
                 continue;
             }
             strings.push_back(reader.line().substr(2));
             StringIndex index;
             index.index = strings.size();
 
             auto opNewIt = find(opNewStrings.begin(), opNewStrings.end(), strings.back());
             if (opNewIt != opNewStrings.end()) {
                 opNewStrIndices.push_back(index);
                 opNewStrings.erase(opNewIt);
             } else {
                 auto stopIt = find(stopStrings.begin(), stopStrings.end(), strings.back());
                 if (stopIt != stopStrings.end()) {
                     stopIndices.push_back(index);
                     stopStrings.erase(stopIt);
                 }
             }
         } else if (reader.mode() == 't') {
             if (reparsing) {
                 continue;
             }
             TraceNode node;
             reader >> node.ipIndex;
             reader >> node.parentIndex;
             // skip operator new and operator new[] at the beginning of traces
             while (find(opNewIpIndices.begin(), opNewIpIndices.end(), node.ipIndex) != opNewIpIndices.end()) {
                 node = findTrace(node.parentIndex);
             }
             traces.push_back(node);
         } else if (reader.mode() == 'i') {
             if (reparsing) {
                 continue;
             }
             InstructionPointer ip;
             reader >> ip.instructionPointer;
             reader >> ip.moduleIndex;
-            reader >> ip.functionIndex;
-            reader >> ip.fileIndex;
-            reader >> ip.line;
+            auto readFrame = [&reader](Frame* frame) {
+                return (reader >> frame->functionIndex)
+                    && (reader >> frame->fileIndex)
+                    && (reader >> frame->line);
+            };
+            if (readFrame(&ip.frame)) {
+                Frame inlinedFrame;
+                while (readFrame(&inlinedFrame)) {
+                    ip.inlined.push_back(inlinedFrame);
+                }
+            }
+
             instructionPointers.push_back(ip);
-            if (find(opNewStrIndices.begin(), opNewStrIndices.end(), ip.functionIndex) != opNewStrIndices.end()) {
+            if (find(opNewStrIndices.begin(), opNewStrIndices.end(), ip.frame.functionIndex) != opNewStrIndices.end()) {
                 IpIndex index;
                 index.index = instructionPointers.size();
                 opNewIpIndices.push_back(index);
             }
         } else if (reader.mode() == '+') {
             AllocationInfo info;
             AllocationIndex allocationIndex;
             if (fileVersion >= 1) {
                 if (!(reader >> allocationIndex.index)) {
                     cerr << "failed to parse line: " << reader.line() << endl;
                     continue;
                 } else if (allocationIndex.index >= allocationInfos.size()) {
                     cerr << "allocation index out of bounds: " << allocationIndex.index
                          << ", maximum is: " << allocationInfos.size() << endl;
                     continue;
                 }
                 info = allocationInfos[allocationIndex.index];
                 lastAllocationPtr = allocationIndex.index;
             } else { // backwards compatibility
                 uint64_t ptr = 0;
                 if (!(reader >> info.size) || !(reader >> info.traceIndex) || !(reader >> ptr)) {
                     cerr << "failed to parse line: " << reader.line() << endl;
                     continue;
                 }
                 if (allocationInfoSet.add(info.size, info.traceIndex, &allocationIndex)) {
                     allocationInfos.push_back(info);
                 }
                 pointers.addPointer(ptr, allocationIndex);
                 lastAllocationPtr = ptr;
             }
 
             auto& allocation = findAllocation(info.traceIndex);
             allocation.leaked += info.size;
             allocation.allocated += info.size;
             ++allocation.allocations;
             if (allocation.leaked > allocation.peak) {
                 allocation.peak = allocation.leaked;
             }
 
             ++totalCost.allocations;
             totalCost.allocated += info.size;
             totalCost.leaked += info.size;
             if (totalCost.leaked > totalCost.peak) {
                 totalCost.peak = totalCost.leaked;
                 peakTime = timeStamp;
             }
 
             handleAllocation(info, allocationIndex);
         } else if (reader.mode() == '-') {
             AllocationIndex allocationInfoIndex;
             bool temporary = false;
             if (fileVersion >= 1) {
                 if (!(reader >> allocationInfoIndex.index)) {
                     cerr << "failed to parse line: " << reader.line() << endl;
                     continue;
                 }
                 temporary = lastAllocationPtr == allocationInfoIndex.index;
             } else { // backwards compatibility
                 uint64_t ptr = 0;
                 if (!(reader >> ptr)) {
                     cerr << "failed to parse line: " << reader.line() << endl;
                     continue;
                 }
                 auto taken = pointers.takePointer(ptr);
                 if (!taken.second) {
                     // happens when we attached to a running application
                     continue;
                 }
                 allocationInfoIndex = taken.first;
                 temporary = lastAllocationPtr == ptr;
             }
             lastAllocationPtr = 0;
 
             const auto& info = allocationInfos[allocationInfoIndex.index];
             auto& allocation = findAllocation(info.traceIndex);
             allocation.leaked -= info.size;
             totalCost.leaked -= info.size;
             if (temporary) {
                 ++allocation.temporary;
                 ++totalCost.temporary;
             }
         } else if (reader.mode() == 'a') {
             if (reparsing) {
                 continue;
             }
             AllocationInfo info;
             if (!(reader >> info.size) || !(reader >> info.traceIndex)) {
                 cerr << "failed to parse line: " << reader.line() << endl;
                 continue;
             }
             allocationInfos.push_back(info);
         } else if (reader.mode() == '#') {
             // comment or empty line
             continue;
         } else if (reader.mode() == 'c') {
             int64_t newStamp = 0;
             if (!(reader >> newStamp)) {
                 cerr << "Failed to read time stamp: " << reader.line() << endl;
                 continue;
             }
             handleTimeStamp(timeStamp, newStamp);
             timeStamp = newStamp;
         } else if (reader.mode() == 'R') { // RSS timestamp
             int64_t rss = 0;
             reader >> rss;
             if (rss > peakRSS) {
                 peakRSS = rss;
             }
         } else if (reader.mode() == 'X') {
             handleDebuggee(reader.line().c_str() + 2);
         } else if (reader.mode() == 'A') {
             totalCost = {};
             fromAttached = true;
         } else if (reader.mode() == 'v') {
             uint heaptrackVersion = 0;
             reader >> heaptrackVersion;
             if (!(reader >> fileVersion) && heaptrackVersion == 0x010200) {
                 // backwards compatibility: before the 1.0.0, I actually
                 // bumped the version to 0x010200 already and used that
                 // as file version. This is what we now consider v1 of the
                 // file format
                 fileVersion = 1;
             }
             if (fileVersion > HEAPTRACK_FILE_FORMAT_VERSION) {
                 cerr << "The data file has version " << hex << fileVersion << " and was written by heaptrack version "
                      << hex << heaptrackVersion << ")\n"
                      << "This is not compatible with this build of heaptrack (version " << hex << HEAPTRACK_VERSION
                      << "), which can read file format version " << hex << HEAPTRACK_FILE_FORMAT_VERSION << " and below"
                      << endl;
                 return false;
             }
         } else if (reader.mode() == 'I') { // system information
             reader >> systemInfo.pageSize;
             reader >> systemInfo.pages;
         } else {
             cerr << "failed to parse line: " << reader.line() << endl;
         }
     }
 
     if (!reparsing) {
         totalTime = timeStamp + 1;
     }
 
     handleTimeStamp(timeStamp, totalTime);
 
     return true;
 }
 
 namespace { // helpers for diffing
 
 template <typename IndexT, typename SortF>
 vector<IndexT> sortedIndices(size_t numIndices, SortF sorter)
 {
     vector<IndexT> indices;
     indices.resize(numIndices);
     for (size_t i = 0; i < numIndices; ++i) {
         indices[i].index = (i + 1);
     }
     sort(indices.begin(), indices.end(), sorter);
     return indices;
 }
 
 vector<StringIndex> remapStrings(vector<string>& lhs, const vector<string>& rhs)
 {
     unordered_map<string, StringIndex> stringRemapping;
     StringIndex stringIndex;
     {
         stringRemapping.reserve(lhs.size());
         for (const auto& string : lhs) {
             ++stringIndex.index;
             stringRemapping.insert(make_pair(string, stringIndex));
         }
     }
 
     vector<StringIndex> map;
     {
         map.reserve(rhs.size() + 1);
         map.push_back({});
         for (const auto& string : rhs) {
             auto it = stringRemapping.find(string);
             if (it == stringRemapping.end()) {
                 ++stringIndex.index;
                 lhs.push_back(string);
                 map.push_back(stringIndex);
             } else {
                 map.push_back(it->second);
             }
         }
     }
     return map;
 }
 
 template <typename T>
 inline const T& identity(const T& t)
 {
     return t;
 }
 
 template <typename IpMapper>
 int compareTraceIndices(const TraceIndex& lhs, const AccumulatedTraceData& lhsData, const TraceIndex& rhs,
                         const AccumulatedTraceData& rhsData, IpMapper ipMapper)
 {
     if (!lhs && !rhs) {
         return 0;
     } else if (lhs && !rhs) {
         return 1;
     } else if (rhs && !lhs) {
         return -1;
     } else if (&lhsData == &rhsData && lhs == rhs) {
         // fast-path if both indices are equal and we compare the same data
         return 0;
     }
 
     const auto& lhsTrace = lhsData.findTrace(lhs);
     const auto& rhsTrace = rhsData.findTrace(rhs);
 
     const int parentComparsion =
         compareTraceIndices(lhsTrace.parentIndex, lhsData, rhsTrace.parentIndex, rhsData, ipMapper);
     if (parentComparsion != 0) {
         return parentComparsion;
     } // else fall-through to below, parents are equal
 
     const auto& lhsIp = lhsData.findIp(lhsTrace.ipIndex);
     const auto& rhsIp = ipMapper(rhsData.findIp(rhsTrace.ipIndex));
     if (lhsIp.equalWithoutAddress(rhsIp)) {
         return 0;
     }
     return lhsIp.compareWithoutAddress(rhsIp) ? -1 : 1;
 }
 
 POTENTIALLY_UNUSED void printTrace(const AccumulatedTraceData& data, TraceIndex index)
 {
     do {
         const auto trace = data.findTrace(index);
         const auto& ip = data.findIp(trace.ipIndex);
         cerr << index << " (" << trace.ipIndex << ", " << trace.parentIndex << ")" << '\t'
-             << data.stringify(ip.functionIndex) << " in " << data.stringify(ip.moduleIndex) << " at "
-             << data.stringify(ip.fileIndex) << ':' << ip.line << '\n';
+             << data.stringify(ip.frame.functionIndex) << " in " << data.stringify(ip.moduleIndex) << " at "
+             << data.stringify(ip.frame.fileIndex) << ':' << ip.frame.line << '\n';
+        for (const auto& inlined : ip.inlined) {
+            cerr << '\t' << data.stringify(inlined.functionIndex) << " at "
+                 << data.stringify(inlined.fileIndex) << ':' << inlined.line << '\n';
+        }
         index = trace.parentIndex;
     } while (index);
     cerr << "---\n";
 }
 }
 
 void AccumulatedTraceData::diff(const AccumulatedTraceData& base)
 {
     totalCost -= base.totalCost;
     totalTime -= base.totalTime;
     peakRSS -= base.peakRSS;
     systemInfo.pages -= base.systemInfo.pages;
     systemInfo.pageSize -= base.systemInfo.pageSize;
 
     // step 1: sort trace indices of allocations for efficient lookup
     // step 2: while at it, also merge equal allocations
     vector<TraceIndex> allocationTraceNodes;
     allocationTraceNodes.reserve(allocations.size());
     for (auto it = allocations.begin(); it != allocations.end();) {
         const auto& allocation = *it;
         auto sortedIt =
             lower_bound(allocationTraceNodes.begin(), allocationTraceNodes.end(), allocation.traceIndex,
                         [this](const TraceIndex& lhs, const TraceIndex& rhs) -> bool {
                             return compareTraceIndices(lhs, *this, rhs, *this, identity<InstructionPointer>) < 0;
                         });
         if (sortedIt == allocationTraceNodes.end()
             || compareTraceIndices(allocation.traceIndex, *this, *sortedIt, *this, identity<InstructionPointer>) != 0) {
             allocationTraceNodes.insert(sortedIt, allocation.traceIndex);
             ++it;
         } else if (*sortedIt != allocation.traceIndex) {
             findAllocation(*sortedIt) += allocation;
             it = allocations.erase(it);
         } else {
             ++it;
         }
     }
 
     // step 3: map string indices from rhs to lhs data
 
     const auto& stringMap = remapStrings(strings, base.strings);
     auto remapString = [&stringMap](StringIndex& index) {
         if (index) {
             index.index = stringMap[index.index].index;
         }
     };
-    auto remapIp = [&remapString](InstructionPointer ip) -> InstructionPointer {
+    auto remapFrame = [&remapString](Frame frame) -> Frame {
+        remapString(frame.functionIndex);
+        remapString(frame.fileIndex);
+        return frame;
+    };
+    auto remapIp = [&remapString, &remapFrame](InstructionPointer ip) -> InstructionPointer {
         remapString(ip.moduleIndex);
-        remapString(ip.functionIndex);
-        remapString(ip.fileIndex);
+        remapFrame(ip.frame);
+        for (auto& inlined : ip.inlined) {
+            inlined = remapFrame(inlined);
+        }
         return ip;
     };
 
     // step 4: iterate over rhs data and find matching traces
     //         if no match is found, copy the data over
 
     auto sortedIps = sortedIndices<IpIndex>(instructionPointers.size(), [this](const IpIndex& lhs, const IpIndex& rhs) {
         return findIp(lhs).compareWithoutAddress(findIp(rhs));
     });
 
     // map an IpIndex from the rhs data into the lhs data space, or copy the data
     // if it does not exist yet
     auto remapIpIndex = [&sortedIps, this, &base, &remapIp](IpIndex rhsIndex) -> IpIndex {
         if (!rhsIndex) {
             return rhsIndex;
         }
 
         const auto& rhsIp = base.findIp(rhsIndex);
         const auto& lhsIp = remapIp(rhsIp);
 
         auto it = lower_bound(sortedIps.begin(), sortedIps.end(), lhsIp,
                               [this](const IpIndex& lhs, const InstructionPointer& rhs) {
                                   return findIp(lhs).compareWithoutAddress(rhs);
                               });
         if (it != sortedIps.end() && findIp(*it).equalWithoutAddress(lhsIp)) {
             return *it;
         }
 
         instructionPointers.push_back(lhsIp);
 
         IpIndex ret;
         ret.index = instructionPointers.size();
         sortedIps.insert(it, ret);
 
         return ret;
     };
 
     // copy the rhs trace index and the data it references into the lhs data,
     // recursively
     function<TraceIndex(TraceIndex)> copyTrace = [this, &base, remapIpIndex,
                                                   &copyTrace](TraceIndex rhsIndex) -> TraceIndex {
         if (!rhsIndex) {
             return rhsIndex;
         }
 
         // new location, add it
         const auto& rhsTrace = base.findTrace(rhsIndex);
 
         TraceNode node;
         node.parentIndex = copyTrace(rhsTrace.parentIndex);
         node.ipIndex = remapIpIndex(rhsTrace.ipIndex);
 
         traces.push_back(node);
         TraceIndex ret;
         ret.index = traces.size();
 
         return ret;
     };
 
     // find an equivalent trace or copy the data over if it does not exist yet
     // a trace is equivalent if the complete backtrace has equal
     // InstructionPointer
     // data while ignoring the actual pointer address
     auto remapTrace = [&base, &allocationTraceNodes, this, remapIp, copyTrace](TraceIndex rhsIndex) -> TraceIndex {
         if (!rhsIndex) {
             return rhsIndex;
         }
 
         auto it = lower_bound(allocationTraceNodes.begin(), allocationTraceNodes.end(), rhsIndex,
                               [&base, this, remapIp](const TraceIndex& lhs, const TraceIndex& rhs) -> bool {
                                   return compareTraceIndices(lhs, *this, rhs, base, remapIp) < 0;
                               });
 
         if (it != allocationTraceNodes.end() && compareTraceIndices(*it, *this, rhsIndex, base, remapIp) == 0) {
             return *it;
         }
 
         TraceIndex ret = copyTrace(rhsIndex);
         allocationTraceNodes.insert(it, ret);
         return ret;
     };
 
     for (const auto& rhsAllocation : base.allocations) {
         const auto lhsTrace = remapTrace(rhsAllocation.traceIndex);
         assert(remapIp(base.findIp(base.findTrace(rhsAllocation.traceIndex).ipIndex))
                    .equalWithoutAddress(findIp(findTrace(lhsTrace).ipIndex)));
         findAllocation(lhsTrace) -= rhsAllocation;
     }
 
     // step 5: remove allocations that don't show any differences
     //         note that when there are differences in the backtraces,
     //         we can still end up with merged backtraces that have a total
     //         of 0, but different "tails" of different origin with non-zero cost
     allocations.erase(remove_if(allocations.begin(), allocations.end(),
                                 [](const Allocation& allocation) -> bool { return allocation == AllocationData(); }),
                       allocations.end());
 }
 
 Allocation& AccumulatedTraceData::findAllocation(const TraceIndex traceIndex)
 {
     if (traceIndex < m_maxAllocationTraceIndex) {
         // only need to search when the trace index is previously known
         auto it = lower_bound(allocations.begin(), allocations.end(), traceIndex,
                               [](const Allocation& allocation, const TraceIndex traceIndex) -> bool {
                                   return allocation.traceIndex < traceIndex;
                               });
         if (it == allocations.end() || it->traceIndex != traceIndex) {
             Allocation allocation;
             allocation.traceIndex = traceIndex;
             it = allocations.insert(it, allocation);
         }
         return *it;
     } else if (traceIndex == m_maxAllocationTraceIndex && !allocations.empty()) {
         // reuse the last allocation
         assert(allocations.back().traceIndex == traceIndex);
     } else {
         // actually a new allocation
         Allocation allocation;
         allocation.traceIndex = traceIndex;
         allocations.push_back(allocation);
         m_maxAllocationTraceIndex = traceIndex;
     }
     return allocations.back();
 }
 
 InstructionPointer AccumulatedTraceData::findIp(const IpIndex ipIndex) const
 {
     if (!ipIndex || ipIndex.index > instructionPointers.size()) {
         return {};
     } else {
         return instructionPointers[ipIndex.index - 1];
     }
 }
 
 TraceNode AccumulatedTraceData::findTrace(const TraceIndex traceIndex) const
 {
     if (!traceIndex || traceIndex.index > traces.size()) {
         return {};
     } else {
         return traces[traceIndex.index - 1];
     }
 }
 
 bool AccumulatedTraceData::isStopIndex(const StringIndex index) const
 {
     return find(stopIndices.begin(), stopIndices.end(), index) != stopIndices.end();
 }
diff --git a/src/analyze/accumulatedtracedata.h b/src/analyze/accumulatedtracedata.h
index 913bb3e..d842e86 100644
--- a/src/analyze/accumulatedtracedata.h
+++ b/src/analyze/accumulatedtracedata.h
@@ -1,140 +1,158 @@
 /*
  * Copyright 2015-2017 Milian Wolff <mail@milianw.de>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #ifndef ACCUMULATEDTRACEDATA_H
 #define ACCUMULATEDTRACEDATA_H
 
 #include <iosfwd>
 #include <tuple>
 #include <vector>
 
 #include <fstream>
 #include <map>
 #include <unordered_map>
 #include <unordered_set>
 
 #include <boost/functional/hash.hpp>
 
 #include "allocationdata.h"
 #include "util/indices.h"
 
-struct InstructionPointer
+struct Frame
 {
-    uint64_t instructionPointer = 0;
-    ModuleIndex moduleIndex;
     FunctionIndex functionIndex;
     FileIndex fileIndex;
     int line = 0;
 
+    bool operator==(const Frame& rhs) const
+    {
+        return std::tie(functionIndex, fileIndex, line)
+            == std::tie(rhs.functionIndex, rhs.fileIndex, rhs.line);
+    }
+
+    bool operator<(const Frame& rhs) const
+    {
+        return std::tie(functionIndex, fileIndex, line)
+             < std::tie(rhs.functionIndex, rhs.fileIndex, rhs.line);
+    }
+};
+
+struct InstructionPointer
+{
+    uint64_t instructionPointer = 0;
+    ModuleIndex moduleIndex;
+    Frame frame;
+    std::vector<Frame> inlined;
+
     bool compareWithoutAddress(const InstructionPointer& other) const
     {
-        return std::tie(moduleIndex, functionIndex, fileIndex, line)
-            < std::tie(other.moduleIndex, other.functionIndex, other.fileIndex, other.line);
+        return std::tie(moduleIndex, frame)
+             < std::tie(other.moduleIndex, other.frame);
     }
 
     bool equalWithoutAddress(const InstructionPointer& other) const
     {
-        return std::tie(moduleIndex, functionIndex, fileIndex, line)
-            == std::tie(other.moduleIndex, other.functionIndex, other.fileIndex, other.line);
+        return std::tie(moduleIndex, frame)
+            == std::tie(other.moduleIndex, other.frame);
     }
 };
 
 struct TraceNode
 {
     IpIndex ipIndex;
     TraceIndex parentIndex;
 };
 
 struct Allocation : public AllocationData
 {
     // backtrace entry point
     TraceIndex traceIndex;
 };
 
 /**
  * Information for a single call to an allocation function.
  */
 struct AllocationInfo
 {
     uint64_t size = 0;
     TraceIndex traceIndex;
     bool operator==(const AllocationInfo& rhs) const
     {
         return rhs.traceIndex == traceIndex && rhs.size == size;
     }
 };
 
 struct AccumulatedTraceData
 {
     AccumulatedTraceData();
     virtual ~AccumulatedTraceData() = default;
 
     virtual void handleTimeStamp(int64_t oldStamp, int64_t newStamp) = 0;
     virtual void handleAllocation(const AllocationInfo& info, const AllocationIndex index) = 0;
     virtual void handleDebuggee(const char* command) = 0;
 
     const std::string& stringify(const StringIndex stringId) const;
 
     std::string prettyFunction(const std::string& function) const;
 
     bool read(const std::string& inputFile);
     bool read(std::istream& in);
 
     void diff(const AccumulatedTraceData& base);
 
     bool shortenTemplates = false;
     bool fromAttached = false;
 
     std::vector<Allocation> allocations;
     AllocationData totalCost;
     int64_t totalTime = 0;
     int64_t peakTime = 0;
     int64_t peakRSS = 0;
 
     struct SystemInfo
     {
         int64_t pages = 0;
         int64_t pageSize = 0;
     };
     SystemInfo systemInfo;
 
     // our indices are sequentially increasing thus a new allocation can only ever
     // occur with an index larger than any other we encountered so far
     // this can be used to our advantage in speeding up the findAllocation calls.
     TraceIndex m_maxAllocationTraceIndex;
 
     Allocation& findAllocation(const TraceIndex traceIndex);
 
     InstructionPointer findIp(const IpIndex ipIndex) const;
 
     TraceNode findTrace(const TraceIndex traceIndex) const;
 
     bool isStopIndex(const StringIndex index) const;
 
     // indices of functions that should stop the backtrace, e.g. main or static
     // initialization
     std::vector<StringIndex> stopIndices;
     std::vector<InstructionPointer> instructionPointers;
     std::vector<TraceNode> traces;
     std::vector<std::string> strings;
     std::vector<IpIndex> opNewIpIndices;
 
     std::vector<AllocationInfo> allocationInfos;
 };
 
 #endif // ACCUMULATEDTRACEDATA_H
diff --git a/src/analyze/gui/parser.cpp b/src/analyze/gui/parser.cpp
index 5862c59..c3e2c00 100644
--- a/src/analyze/gui/parser.cpp
+++ b/src/analyze/gui/parser.cpp
@@ -1,600 +1,613 @@
 /*
  * Copyright 2015-2017 Milian Wolff <mail@milianw.de>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include "parser.h"
 
 #include <KLocalizedString>
 #include <ThreadWeaver/ThreadWeaver>
 
 #include <QDebug>
 
 #include "analyze/accumulatedtracedata.h"
 
 #include <future>
 #include <tuple>
 #include <vector>
 
 using namespace std;
 
 namespace {
 
 // TODO: use QString directly
 struct StringCache
 {
-    QString func(const InstructionPointer& ip) const
+    QString func(const Frame& frame) const
     {
-        if (ip.functionIndex) {
+        if (frame.functionIndex) {
             // TODO: support removal of template arguments
-            return stringify(ip.functionIndex);
+            return stringify(frame.functionIndex);
         } else {
             return unresolvedFunctionName();
         }
     }
 
-    QString file(const InstructionPointer& ip) const
+    QString file(const Frame& frame) const
     {
-        if (ip.fileIndex) {
-            return stringify(ip.fileIndex);
+        if (frame.fileIndex) {
+            return stringify(frame.fileIndex);
         } else {
             return {};
         }
     }
 
     QString module(const InstructionPointer& ip) const
     {
         return stringify(ip.moduleIndex);
     }
 
     QString stringify(const StringIndex index) const
     {
         if (!index || index.index > m_strings.size()) {
             return {};
         } else {
             return m_strings.at(index.index - 1);
         }
     }
 
     LocationData::Ptr location(const IpIndex& index, const InstructionPointer& ip) const
     {
         // first try a fast index-based lookup
         auto& location = m_locationsMap[index];
         if (!location) {
-            // slow-path, look for interned location
-            // note that we can get the same locatoin for different IPs
-            LocationData data = {func(ip), file(ip), module(ip), ip.line};
-            auto it = lower_bound(m_locations.begin(), m_locations.end(), data);
-            if (it != m_locations.end() && **it == data) {
-                // we got the location already from a different ip, cache it
-                location = *it;
-            } else {
-                // completely new location, cache it in both containers
-                auto interned = make_shared<LocationData>(data);
-                m_locations.insert(it, interned);
-                location = interned;
-            }
+            return frameLocation(ip.frame, ip.moduleIndex);
+        }
+        return location;
+    }
+
+    LocationData::Ptr frameLocation(const Frame& frame, const ModuleIndex& moduleIndex) const
+    {
+        LocationData::Ptr location;
+        // slow-path, look for interned location
+        // note that we can get the same location for different IPs
+        LocationData data = {func(frame), file(frame), stringify(moduleIndex), frame.line};
+        auto it = lower_bound(m_locations.begin(), m_locations.end(), data);
+        if (it != m_locations.end() && **it == data) {
+            // we got the location already from a different ip, cache it
+            location = *it;
+        } else {
+            // completely new location, cache it in both containers
+            auto interned = make_shared<LocationData>(data);
+            m_locations.insert(it, interned);
+            location = interned;
         }
         return location;
     }
 
     void update(const vector<string>& strings)
     {
         transform(strings.begin() + m_strings.size(), strings.end(), back_inserter(m_strings),
                   [](const string& str) { return QString::fromStdString(str); });
     }
 
     vector<QString> m_strings;
     mutable vector<LocationData::Ptr> m_locations;
     mutable QHash<IpIndex, LocationData::Ptr> m_locationsMap;
 
     bool diffMode = false;
 };
 
 struct ChartMergeData
 {
     IpIndex ip;
     qint64 consumed;
     qint64 allocations;
     qint64 allocated;
     qint64 temporary;
     bool operator<(const IpIndex rhs) const
     {
         return ip < rhs;
     }
 };
 
 const uint64_t MAX_CHART_DATAPOINTS = 500; // TODO: make this configurable via the GUI
 
 struct ParserData final : public AccumulatedTraceData
 {
     ParserData()
     {
     }
 
     void updateStringCache()
     {
         stringCache.update(strings);
     }
 
     void prepareBuildCharts()
     {
         if (stringCache.diffMode) {
             return;
         }
         consumedChartData.rows.reserve(MAX_CHART_DATAPOINTS);
         allocatedChartData.rows.reserve(MAX_CHART_DATAPOINTS);
         allocationsChartData.rows.reserve(MAX_CHART_DATAPOINTS);
         temporaryChartData.rows.reserve(MAX_CHART_DATAPOINTS);
         // start off with null data at the origin
         consumedChartData.rows.push_back({});
         allocatedChartData.rows.push_back({});
         allocationsChartData.rows.push_back({});
         temporaryChartData.rows.push_back({});
         // index 0 indicates the total row
         consumedChartData.labels[0] = i18n("total");
         allocatedChartData.labels[0] = i18n("total");
         allocationsChartData.labels[0] = i18n("total");
         temporaryChartData.labels[0] = i18n("total");
 
         buildCharts = true;
         maxConsumedSinceLastTimeStamp = 0;
         vector<ChartMergeData> merged;
         merged.reserve(instructionPointers.size());
         // merge the allocation cost by instruction pointer
         // TODO: aggregate by function instead?
         // TODO: traverse the merged call stack up until the first fork
         for (const auto& alloc : allocations) {
             const auto ip = findTrace(alloc.traceIndex).ipIndex;
             auto it = lower_bound(merged.begin(), merged.end(), ip);
             if (it == merged.end() || it->ip != ip) {
                 it = merged.insert(it, {ip, 0, 0, 0, 0});
             }
             it->consumed += alloc.peak; // we want to track the top peaks in the chart
             it->allocated += alloc.allocated;
             it->allocations += alloc.allocations;
             it->temporary += alloc.temporary;
         }
         // find the top hot spots for the individual data members and remember their
         // IP and store the label
         auto findTopChartEntries = [&](qint64 ChartMergeData::*member, int LabelIds::*label, ChartData* data) {
             sort(merged.begin(), merged.end(),
                  [=](const ChartMergeData& left, const ChartMergeData& right) { return left.*member > right.*member; });
             for (size_t i = 0; i < min(size_t(ChartRows::MAX_NUM_COST - 1), merged.size()); ++i) {
                 const auto& alloc = merged[i];
                 if (!(alloc.*member)) {
                     break;
                 }
                 const auto ip = alloc.ip;
                 (labelIds[ip].*label) = i + 1;
-                const auto function = stringCache.func(findIp(ip));
+                const auto function = stringCache.func(findIp(ip).frame);
                 data->labels[i + 1] = function;
             }
         };
         findTopChartEntries(&ChartMergeData::consumed, &LabelIds::consumed, &consumedChartData);
         findTopChartEntries(&ChartMergeData::allocated, &LabelIds::allocated, &allocatedChartData);
         findTopChartEntries(&ChartMergeData::allocations, &LabelIds::allocations, &allocationsChartData);
         findTopChartEntries(&ChartMergeData::temporary, &LabelIds::temporary, &temporaryChartData);
     }
 
     void handleTimeStamp(int64_t /*oldStamp*/, int64_t newStamp)
     {
         if (!buildCharts || stringCache.diffMode) {
             return;
         }
         maxConsumedSinceLastTimeStamp = max(maxConsumedSinceLastTimeStamp, totalCost.leaked);
         const int64_t diffBetweenTimeStamps = totalTime / MAX_CHART_DATAPOINTS;
         if (newStamp != totalTime && newStamp - lastTimeStamp < diffBetweenTimeStamps) {
             return;
         }
         const auto nowConsumed = maxConsumedSinceLastTimeStamp;
         maxConsumedSinceLastTimeStamp = 0;
         lastTimeStamp = newStamp;
 
         // create the rows
         auto createRow = [](int64_t timeStamp, int64_t totalCost) {
             ChartRows row;
             row.timeStamp = timeStamp;
             row.cost[0] = totalCost;
             return row;
         };
         auto consumed = createRow(newStamp, nowConsumed);
         auto allocated = createRow(newStamp, totalCost.allocated);
         auto allocs = createRow(newStamp, totalCost.allocations);
         auto temporary = createRow(newStamp, totalCost.temporary);
 
         // if the cost is non-zero and the ip corresponds to a hotspot function
         // selected in the labels,
         // we add the cost to the rows column
         auto addDataToRow = [](int64_t cost, int labelId, ChartRows* rows) {
             if (!cost || labelId == -1) {
                 return;
             }
             rows->cost[labelId] += cost;
         };
         for (const auto& alloc : allocations) {
             const auto ip = findTrace(alloc.traceIndex).ipIndex;
             auto it = labelIds.constFind(ip);
             if (it == labelIds.constEnd()) {
                 continue;
             }
             const auto& labelIds = *it;
             addDataToRow(alloc.leaked, labelIds.consumed, &consumed);
             addDataToRow(alloc.allocated, labelIds.allocated, &allocated);
             addDataToRow(alloc.allocations, labelIds.allocations, &allocs);
             addDataToRow(alloc.temporary, labelIds.temporary, &temporary);
         }
         // add the rows for this time stamp
         consumedChartData.rows << consumed;
         allocatedChartData.rows << allocated;
         allocationsChartData.rows << allocs;
         temporaryChartData.rows << temporary;
     }
 
     void handleAllocation(const AllocationInfo& info, const AllocationIndex index)
     {
         maxConsumedSinceLastTimeStamp = max(maxConsumedSinceLastTimeStamp, totalCost.leaked);
 
         if (index.index == allocationInfoCounter.size()) {
             allocationInfoCounter.push_back({info, 1});
         } else {
             ++allocationInfoCounter[index.index].allocations;
         }
     }
 
     void handleDebuggee(const char* command)
     {
         debuggee = command;
     }
 
     string debuggee;
 
     struct CountedAllocationInfo
     {
         AllocationInfo info;
         int64_t allocations;
         bool operator<(const CountedAllocationInfo& rhs) const
         {
             return tie(info.size, allocations) < tie(rhs.info.size, rhs.allocations);
         }
     };
     vector<CountedAllocationInfo> allocationInfoCounter;
 
     ChartData consumedChartData;
     ChartData allocationsChartData;
     ChartData allocatedChartData;
     ChartData temporaryChartData;
     // here we store the indices into ChartRows::cost for those IpIndices that
     // are within the top hotspots. This way, we can do one hash lookup in the
     // handleTimeStamp function instead of three when we'd store this data
     // in a per-ChartData hash.
     struct LabelIds
     {
         int consumed = -1;
         int allocations = -1;
         int allocated = -1;
         int temporary = -1;
     };
     QHash<IpIndex, LabelIds> labelIds;
     int64_t maxConsumedSinceLastTimeStamp = 0;
     int64_t lastTimeStamp = 0;
 
     StringCache stringCache;
 
     bool buildCharts = false;
 };
 
 void setParents(QVector<RowData>& children, const RowData* parent)
 {
     for (auto& row : children) {
         row.parent = parent;
         setParents(row.children, &row);
     }
 }
 
 TreeData mergeAllocations(const ParserData& data)
 {
     TreeData topRows;
+    auto addRow = [](TreeData* rows, const LocationData::Ptr& location, const Allocation& cost) -> TreeData* {
+        auto it = lower_bound(rows->begin(), rows->end(), location);
+        if (it != rows->end() && it->location == location) {
+            it->cost += cost;
+        } else {
+            it = rows->insert(it, {cost, location, nullptr, {}});
+        }
+        return &it->children;
+    };
     // merge allocations, leave parent pointers invalid (their location may change)
     for (const auto& allocation : data.allocations) {
         auto traceIndex = allocation.traceIndex;
         auto rows = &topRows;
         while (traceIndex) {
             const auto& trace = data.findTrace(traceIndex);
             const auto& ip = data.findIp(trace.ipIndex);
             auto location = data.stringCache.location(trace.ipIndex, ip);
-
-            auto it = lower_bound(rows->begin(), rows->end(), location);
-            if (it != rows->end() && it->location == location) {
-                it->cost += allocation;
-            } else {
-                it = rows->insert(it, {allocation, location, nullptr, {}});
+            rows = addRow(rows, location, allocation);
+            for (const auto& inlined : ip.inlined) {
+                auto inlinedLocation = data.stringCache.frameLocation(inlined, ip.moduleIndex);
+                rows = addRow(rows, inlinedLocation, allocation);
             }
-            if (data.isStopIndex(ip.functionIndex)) {
+            if (data.isStopIndex(ip.frame.functionIndex)) {
                 break;
             }
             traceIndex = trace.parentIndex;
-            rows = &it->children;
         }
     }
     // now set the parents, the data is constant from here on
     setParents(topRows, nullptr);
 
     return topRows;
 }
 
 RowData* findByLocation(const RowData& row, QVector<RowData>* data)
 {
     for (int i = 0; i < data->size(); ++i) {
         if (data->at(i).location == row.location) {
             return data->data() + i;
         }
     }
     return nullptr;
 }
 
 AllocationData buildTopDown(const TreeData& bottomUpData, TreeData* topDownData)
 {
     AllocationData totalCost;
     for (const auto& row : bottomUpData) {
         // recurse and find the cost attributed to children
         const auto childCost = buildTopDown(row.children, topDownData);
         if (childCost != row.cost) {
             // this row is (partially) a leaf
             const auto cost = row.cost - childCost;
 
             // bubble up the parent chain to build a top-down tree
             auto node = &row;
             auto stack = topDownData;
             while (node) {
                 auto data = findByLocation(*node, stack);
                 if (!data) {
                     // create an empty top-down item for this bottom-up node
                     *stack << RowData{{}, node->location, nullptr, {}};
                     data = &stack->back();
                 }
                 // always use the leaf node's cost and propagate that one up the chain
                 // otherwise we'd count the cost of some nodes multiple times
                 data->cost += cost;
                 stack = &data->children;
                 node = node->parent;
             }
         }
         totalCost += row.cost;
     }
     return totalCost;
 }
 
 QVector<RowData> toTopDownData(const QVector<RowData>& bottomUpData)
 {
     QVector<RowData> topRows;
     buildTopDown(bottomUpData, &topRows);
     // now set the parents, the data is constant from here on
     setParents(topRows, nullptr);
     return topRows;
 }
 
 AllocationData buildCallerCallee(const TreeData& bottomUpData, CallerCalleeRows* callerCalleeData)
 {
     AllocationData totalCost;
     for (const auto& row : bottomUpData) {
         // recurse to find a leaf
         const auto childCost = buildCallerCallee(row.children, callerCalleeData);
         if (childCost != row.cost) {
             // this row is (partially) a leaf
             const auto cost = row.cost - childCost;
 
             // leaf node found, bubble up the parent chain to add cost for all frames
             // to the caller/callee data. this is done top-down since we must not count
             // symbols more than once in the caller-callee data
             QSet<LocationData::Ptr> recursionGuard;
 
             auto node = &row;
             while (node) {
                 const auto& location = node->location;
                 if (!recursionGuard.contains(location)) { // aggregate caller-callee data
                     auto it = lower_bound(callerCalleeData->begin(), callerCalleeData->end(), location,
                         [](const CallerCalleeData& lhs, const LocationData::Ptr& rhs) { return lhs.location < rhs; });
                     if (it == callerCalleeData->end() || it->location != location) {
                         it = callerCalleeData->insert(it, {{}, {}, location});
                     }
                     it->inclusiveCost += cost;
                     if (!node->parent) {
                         it->selfCost += cost;
                     }
                     recursionGuard.insert(location);
                 }
                 node = node->parent;
             }
         }
         totalCost += row.cost;
     }
     return totalCost;
 }
 
 CallerCalleeRows toCallerCalleeData(const QVector<RowData>& bottomUpData, bool diffMode)
 {
     CallerCalleeRows callerCalleeRows;
 
     buildCallerCallee(bottomUpData, &callerCalleeRows);
 
     if (diffMode) {
         // remove rows without cost
         callerCalleeRows.erase(remove_if(callerCalleeRows.begin(), callerCalleeRows.end(),
                                          [](const CallerCalleeData& data) -> bool {
                                              return data.inclusiveCost == AllocationData()
                                                  && data.selfCost == AllocationData();
                                          }),
                                callerCalleeRows.end());
     }
 
     return callerCalleeRows;
 }
 
 struct MergedHistogramColumnData
 {
     LocationData::Ptr location;
     int64_t allocations;
     bool operator<(const LocationData::Ptr& rhs) const
     {
         return location < rhs;
     }
 };
 
 HistogramData buildSizeHistogram(ParserData& data)
 {
     HistogramData ret;
     if (data.allocationInfoCounter.empty()) {
         return ret;
     }
     sort(data.allocationInfoCounter.begin(), data.allocationInfoCounter.end());
     const auto totalLabel = i18n("total");
     HistogramRow row;
     const pair<uint64_t, QString> buckets[] = {{8, i18n("0B to 8B")},
                                                {16, i18n("9B to 16B")},
                                                {32, i18n("17B to 32B")},
                                                {64, i18n("33B to 64B")},
                                                {128, i18n("65B to 128B")},
                                                {256, i18n("129B to 256B")},
                                                {512, i18n("257B to 512B")},
                                                {1024, i18n("512B to 1KB")},
                                                {numeric_limits<uint64_t>::max(), i18n("more than 1KB")}};
     uint bucketIndex = 0;
     row.size = buckets[bucketIndex].first;
     row.sizeLabel = buckets[bucketIndex].second;
     vector<MergedHistogramColumnData> columnData;
     columnData.reserve(128);
     auto insertColumns = [&]() {
         sort(columnData.begin(), columnData.end(),
              [](const MergedHistogramColumnData& lhs, const MergedHistogramColumnData& rhs) {
                  return lhs.allocations > rhs.allocations;
              });
         // -1 to account for total row
         for (size_t i = 0; i < min(columnData.size(), size_t(HistogramRow::NUM_COLUMNS - 1)); ++i) {
             const auto& column = columnData[i];
             row.columns[i + 1] = {column.allocations, column.location};
         }
     };
     for (const auto& info : data.allocationInfoCounter) {
         if (info.info.size > row.size) {
             insertColumns();
             columnData.clear();
             ret << row;
             ++bucketIndex;
             row.size = buckets[bucketIndex].first;
             row.sizeLabel = buckets[bucketIndex].second;
             row.columns[0] = {info.allocations, {}};
         } else {
             row.columns[0].allocations += info.allocations;
         }
         const auto ipIndex = data.findTrace(info.info.traceIndex).ipIndex;
         const auto ip = data.findIp(ipIndex);
         const auto location = data.stringCache.location(ipIndex, ip);
         auto it = lower_bound(columnData.begin(), columnData.end(), location);
         if (it == columnData.end() || it->location != location) {
             columnData.insert(it, {location, info.allocations});
         } else {
             it->allocations += info.allocations;
         }
     }
     insertColumns();
     ret << row;
     return ret;
 }
 }
 
 Parser::Parser(QObject* parent)
     : QObject(parent)
 {
     qRegisterMetaType<SummaryData>();
 }
 
 Parser::~Parser() = default;
 
 void Parser::parse(const QString& path, const QString& diffBase)
 {
     using namespace ThreadWeaver;
     stream() << make_job([this, path, diffBase]() {
         const auto stdPath = path.toStdString();
         auto data = make_shared<ParserData>();
         emit progressMessageAvailable(i18n("parsing data..."));
 
         if (!diffBase.isEmpty()) {
             ParserData diffData;
             auto readBase =
                 async(launch::async, [&diffData, diffBase]() { return diffData.read(diffBase.toStdString()); });
             if (!data->read(stdPath)) {
                 emit failedToOpen(path);
                 return;
             }
             if (!readBase.get()) {
                 emit failedToOpen(diffBase);
                 return;
             }
             data->diff(diffData);
             data->stringCache.diffMode = true;
         } else if (!data->read(stdPath)) {
             emit failedToOpen(path);
             return;
         }
 
         data->updateStringCache();
 
         emit summaryAvailable({QString::fromStdString(data->debuggee), data->totalCost, data->totalTime, data->peakTime,
                                data->peakRSS * data->systemInfo.pageSize,
                                data->systemInfo.pages * data->systemInfo.pageSize, data->fromAttached});
 
         emit progressMessageAvailable(i18n("merging allocations..."));
         // merge allocations before modifying the data again
         const auto mergedAllocations = mergeAllocations(*data);
         emit bottomUpDataAvailable(mergedAllocations);
 
         // also calculate the size histogram
         emit progressMessageAvailable(i18n("building size histogram..."));
         const auto sizeHistogram = buildSizeHistogram(*data);
         emit sizeHistogramDataAvailable(sizeHistogram);
         // now data can be modified again for the chart data evaluation
 
         const auto diffMode = data->stringCache.diffMode;
         emit progressMessageAvailable(i18n("building charts..."));
         auto parallel = new Collection;
         *parallel << make_job([this, mergedAllocations]() {
             const auto topDownData = toTopDownData(mergedAllocations);
             emit topDownDataAvailable(topDownData);
         }) << make_job([this, mergedAllocations, diffMode]() {
             const auto callerCalleeData = toCallerCalleeData(mergedAllocations, diffMode);
             emit callerCalleeDataAvailable(callerCalleeData);
         });
         if (!data->stringCache.diffMode) {
             // only build charts when we are not diffing
             *parallel << make_job([this, data, stdPath]() {
                 // this mutates data, and thus anything running in parallel must
                 // not access data
                 data->prepareBuildCharts();
                 data->read(stdPath);
                 emit consumedChartDataAvailable(data->consumedChartData);
                 emit allocationsChartDataAvailable(data->allocationsChartData);
                 emit allocatedChartDataAvailable(data->allocatedChartData);
                 emit temporaryChartDataAvailable(data->temporaryChartData);
             });
         }
 
         auto sequential = new Sequence;
         *sequential << parallel << make_job([this]() { emit finished(); });
 
         stream() << sequential;
     });
 }
diff --git a/src/analyze/print/heaptrack_print.cpp b/src/analyze/print/heaptrack_print.cpp
index 79c6c4a..15a077e 100644
--- a/src/analyze/print/heaptrack_print.cpp
+++ b/src/analyze/print/heaptrack_print.cpp
@@ -1,747 +1,771 @@
 /*
  * Copyright 2014-2016 Milian Wolff <mail@milianw.de>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 /**
  * @file heaptrack_print.cpp
  *
  * @brief Evaluate and print the collected heaptrack data.
  */
 
 #include <boost/program_options.hpp>
 
 #include "analyze/accumulatedtracedata.h"
 
 #include <future>
 #include <iomanip>
 #include <iostream>
 
 #include "util/config.h"
 
 using namespace std;
 namespace po = boost::program_options;
 
 namespace {
 
 /**
  * Merged allocation information by instruction pointer outside of alloc funcs
  */
 struct MergedAllocation : public AllocationData
 {
     // individual backtraces
     std::vector<Allocation> traces;
     // location
     IpIndex ipIndex;
 };
 
 class formatBytes
 {
 public:
     formatBytes(int64_t bytes)
         : m_bytes(bytes)
     {
     }
 
     friend ostream& operator<<(ostream& out, const formatBytes data);
 
 private:
     int64_t m_bytes;
 };
 
 ostream& operator<<(ostream& out, const formatBytes data)
 {
     if (data.m_bytes < 0) {
         // handle negative values
         return out << '-' << formatBytes(-data.m_bytes);
     }
     if (data.m_bytes < 1000) {
         // no fancy formatting for plain byte values, esp. no .00 factions
         return out << data.m_bytes << 'B';
     }
 
     static const auto units = {"B", "KB", "MB", "GB", "TB"};
     auto unit = units.begin();
     size_t i = 0;
     double bytes = data.m_bytes;
     while (i < units.size() - 1 && bytes > 1000.) {
         bytes /= 1000.;
         ++i;
         ++unit;
     }
     return out << fixed << setprecision(2) << bytes << *unit;
 }
 
 struct Printer final : public AccumulatedTraceData
 {
     void finalize()
     {
         filterAllocations();
         mergedAllocations = mergeAllocations(allocations);
     }
 
     void mergeAllocation(vector<MergedAllocation>* mergedAllocations, const Allocation& allocation) const
     {
         const auto trace = findTrace(allocation.traceIndex);
         const auto traceIp = findIp(trace.ipIndex);
         auto it = lower_bound(mergedAllocations->begin(), mergedAllocations->end(), traceIp,
                               [this](const MergedAllocation& allocation, const InstructionPointer traceIp) -> bool {
                                   // Compare meta data without taking the instruction pointer address into account.
                                   // This is useful since sometimes, esp. when we lack debug symbols, the same
                                   // function allocates memory at different IP addresses which is pretty useless
                                   // information most of the time
                                   // TODO: make this configurable, but on-by-default
                                   const auto allocationIp = findIp(allocation.ipIndex);
                                   return allocationIp.compareWithoutAddress(traceIp);
                               });
         if (it == mergedAllocations->end() || !findIp(it->ipIndex).equalWithoutAddress(traceIp)) {
             MergedAllocation merged;
             merged.ipIndex = trace.ipIndex;
             it = mergedAllocations->insert(it, merged);
         }
         it->traces.push_back(allocation);
     }
 
     // merge allocations so that different traces that point to the same
     // instruction pointer at the end where the allocation function is
     // called are combined
     vector<MergedAllocation> mergeAllocations(const vector<Allocation>& allocations) const
     {
         // TODO: merge deeper traces, i.e. A,B,C,D and A,B,C,F
         //       should be merged to A,B,C: D & F
         //       currently the below will only merge it to: A: B,C,D & B,C,F
         vector<MergedAllocation> ret;
         ret.reserve(allocations.size());
         for (const Allocation& allocation : allocations) {
             if (allocation.traceIndex) {
                 mergeAllocation(&ret, allocation);
             }
         }
         for (MergedAllocation& merged : ret) {
             for (const Allocation& allocation : merged.traces) {
                 merged.allocated += allocation.allocated;
                 merged.allocations += allocation.allocations;
                 merged.leaked += allocation.leaked;
                 merged.peak += allocation.peak;
                 merged.temporary += allocation.temporary;
             }
         }
         return ret;
     }
 
     void filterAllocations()
     {
         if (filterBtFunction.empty()) {
             return;
         }
         allocations.erase(remove_if(allocations.begin(), allocations.end(),
                                     [&](const Allocation& allocation) -> bool {
                                         auto node = findTrace(allocation.traceIndex);
                                         while (node.ipIndex) {
                                             const auto& ip = findIp(node.ipIndex);
-                                            if (isStopIndex(ip.functionIndex)) {
+                                            if (isStopIndex(ip.frame.functionIndex)) {
                                                 break;
                                             }
-                                            if (stringify(ip.functionIndex).find(filterBtFunction) != string::npos) {
+                                            auto matchFunction = [this](const Frame& frame) {
+                                                return stringify(frame.functionIndex).find(filterBtFunction) != string::npos;
+                                            };
+                                            if (matchFunction(ip.frame)) {
                                                 return false;
                                             }
+                                            for (const auto& inlined : ip.inlined) {
+                                                if (matchFunction(inlined)) {
+                                                    return false;
+                                                }
+                                            }
                                             node = findTrace(node.parentIndex);
                                         };
                                         return true;
                                     }),
                           allocations.end());
     }
 
     void printIndent(ostream& out, size_t indent, const char* indentString = "  ") const
     {
         while (indent--) {
             out << indentString;
         }
     }
 
     void printIp(const IpIndex ip, ostream& out, const size_t indent = 0) const
     {
         printIp(findIp(ip), out, indent);
     }
 
     void printIp(const InstructionPointer& ip, ostream& out, const size_t indent = 0, bool flameGraph = false) const
     {
         printIndent(out, indent);
 
-        if (ip.functionIndex) {
-            out << prettyFunction(stringify(ip.functionIndex));
+        if (ip.frame.functionIndex) {
+            out << prettyFunction(stringify(ip.frame.functionIndex));
         } else {
             out << "0x" << hex << ip.instructionPointer << dec;
         }
 
         if (flameGraph) {
             // only print the file name but nothing else
-            if (ip.fileIndex) {
-                const auto& file = stringify(ip.fileIndex);
+            auto printFile = [this, &out](FileIndex fileIndex) {
+                const auto& file = stringify(fileIndex);
                 auto idx = file.find_last_of('/') + 1;
                 out << " (" << file.substr(idx) << ")";
+            };
+            if (ip.frame.fileIndex) {
+                printFile(ip.frame.fileIndex);
             }
             out << ';';
+            for (const auto& inlined : ip.inlined) {
+                out << prettyFunction(stringify(inlined.functionIndex));
+                printFile(inlined.fileIndex);
+                out << ';';
+            }
             return;
         }
 
         out << '\n';
         printIndent(out, indent + 1);
 
-        if (ip.fileIndex) {
-            out << "at " << stringify(ip.fileIndex) << ':' << ip.line << '\n';
+        if (ip.frame.fileIndex) {
+            out << "at " << stringify(ip.frame.fileIndex) << ':' << ip.frame.line << '\n';
             printIndent(out, indent + 1);
         }
 
         if (ip.moduleIndex) {
             out << "in " << stringify(ip.moduleIndex);
         } else {
             out << "in ??";
         }
         out << '\n';
+
+        for (const auto& inlined : ip.inlined) {
+            printIndent(out, indent);
+            out << prettyFunction(stringify(inlined.functionIndex)) << '\n';
+            printIndent(out, indent + 1);
+            out << "at " << stringify(inlined.fileIndex) << ':' << inlined.line << '\n';
+        }
     }
 
     void printBacktrace(const TraceIndex traceIndex, ostream& out, const size_t indent = 0,
                         bool skipFirst = false) const
     {
         if (!traceIndex) {
             out << "  ??";
             return;
         }
         printBacktrace(findTrace(traceIndex), out, indent, skipFirst);
     }
 
     void printBacktrace(TraceNode node, ostream& out, const size_t indent = 0, bool skipFirst = false) const
     {
         while (node.ipIndex) {
             const auto& ip = findIp(node.ipIndex);
             if (!skipFirst) {
                 printIp(ip, out, indent);
             }
             skipFirst = false;
 
-            if (isStopIndex(ip.functionIndex)) {
+            if (isStopIndex(ip.frame.functionIndex)) {
                 break;
             }
 
             node = findTrace(node.parentIndex);
         };
     }
 
     /**
      * recursive top-down printer in the format
      *
      * func1;func2 (file);func2 (file);
      */
     void printFlamegraph(TraceNode node, ostream& out) const
     {
         if (!node.ipIndex) {
             return;
         }
 
         const auto& ip = findIp(node.ipIndex);
 
-        if (!isStopIndex(ip.functionIndex)) {
+        if (!isStopIndex(ip.frame.functionIndex)) {
             printFlamegraph(findTrace(node.parentIndex), out);
         }
         printIp(ip, out, 0, true);
     }
 
     template <typename T, typename LabelPrinter, typename SubLabelPrinter>
     void printAllocations(T AllocationData::*member, LabelPrinter label, SubLabelPrinter sublabel)
     {
         if (mergeBacktraces) {
             printMerged(member, label, sublabel);
         } else {
             printUnmerged(member, label);
         }
     }
 
     template <typename T, typename LabelPrinter, typename SubLabelPrinter>
     void printMerged(T AllocationData::*member, LabelPrinter label, SubLabelPrinter sublabel)
     {
         auto sortOrder = [member](const AllocationData& l, const AllocationData& r) {
             return std::abs(l.*member) > std::abs(r.*member);
         };
         sort(mergedAllocations.begin(), mergedAllocations.end(), sortOrder);
         for (size_t i = 0; i < min(peakLimit, mergedAllocations.size()); ++i) {
             auto& allocation = mergedAllocations[i];
             if (!(allocation.*member)) {
                 break;
             }
             label(allocation);
             printIp(allocation.ipIndex, cout);
 
             sort(allocation.traces.begin(), allocation.traces.end(), sortOrder);
             int64_t handled = 0;
             for (size_t j = 0; j < min(subPeakLimit, allocation.traces.size()); ++j) {
                 const auto& trace = allocation.traces[j];
                 sublabel(trace);
                 handled += trace.*member;
                 printBacktrace(trace.traceIndex, cout, 2, true);
             }
             if (allocation.traces.size() > subPeakLimit) {
                 cout << "  and ";
                 if (member == &AllocationData::allocations) {
                     cout << (allocation.*member - handled);
                 } else {
                     cout << formatBytes(allocation.*member - handled);
                 }
                 cout << " from " << (allocation.traces.size() - subPeakLimit) << " other places\n";
             }
             cout << '\n';
         }
     }
 
     template <typename T, typename LabelPrinter>
     void printUnmerged(T AllocationData::*member, LabelPrinter label)
     {
         sort(allocations.begin(), allocations.end(),
              [member](const Allocation& l, const Allocation& r) { return std::abs(l.*member) > std::abs(r.*member); });
         for (size_t i = 0; i < min(peakLimit, allocations.size()); ++i) {
             const auto& allocation = allocations[i];
             if (!(allocation.*member)) {
                 break;
             }
             label(allocation);
             printBacktrace(allocation.traceIndex, cout, 1);
             cout << '\n';
         }
         cout << endl;
     }
 
     void writeMassifHeader(const char* command)
     {
         // write massif header
         massifOut << "desc: heaptrack\n"
                   << "cmd: " << command << '\n'
                   << "time_unit: s\n";
     }
 
     void writeMassifSnapshot(size_t timeStamp, bool isLast)
     {
         if (!lastMassifPeak) {
             lastMassifPeak = totalCost.leaked;
             massifAllocations = allocations;
         }
         massifOut << "#-----------\n"
                   << "snapshot=" << massifSnapshotId << '\n'
                   << "#-----------\n"
                   << "time=" << (0.001 * timeStamp) << '\n'
                   << "mem_heap_B=" << lastMassifPeak << '\n'
                   << "mem_heap_extra_B=0\n"
                   << "mem_stacks_B=0\n";
 
         if (massifDetailedFreq && (isLast || !(massifSnapshotId % massifDetailedFreq))) {
             massifOut << "heap_tree=detailed\n";
             const size_t threshold = double(lastMassifPeak) * massifThreshold * 0.01;
             writeMassifBacktrace(massifAllocations, lastMassifPeak, threshold, IpIndex());
         } else {
             massifOut << "heap_tree=empty\n";
         }
 
         ++massifSnapshotId;
         lastMassifPeak = 0;
     }
 
     void writeMassifBacktrace(const vector<Allocation>& allocations, size_t heapSize, size_t threshold,
                               const IpIndex& location, size_t depth = 0)
     {
         int64_t skippedLeaked = 0;
         size_t numAllocs = 0;
         size_t skipped = 0;
         auto mergedAllocations = mergeAllocations(allocations);
         sort(mergedAllocations.begin(), mergedAllocations.end(),
              [](const MergedAllocation& l, const MergedAllocation& r) { return l.leaked > r.leaked; });
 
         const auto ip = findIp(location);
 
         // skip anything below main
-        const bool shouldStop = isStopIndex(ip.functionIndex);
+        const bool shouldStop = isStopIndex(ip.frame.functionIndex);
         if (!shouldStop) {
             for (auto& merged : mergedAllocations) {
                 if (merged.leaked < 0) {
                     // list is sorted, so we can bail out now - these entries are
                     // uninteresting for massif
                     break;
                 }
 
                 // skip items below threshold
                 if (static_cast<size_t>(merged.leaked) >= threshold) {
                     ++numAllocs;
                     // skip the first level of the backtrace, otherwise we'd endlessly
                     // recurse
                     for (auto& alloc : merged.traces) {
                         alloc.traceIndex = findTrace(alloc.traceIndex).parentIndex;
                     }
                 } else {
                     ++skipped;
                     skippedLeaked += merged.leaked;
                 }
             }
         }
 
+        // TODO: write inlined frames out to massif files
         printIndent(massifOut, depth, " ");
         massifOut << 'n' << (numAllocs + (skipped ? 1 : 0)) << ": " << heapSize;
         if (!depth) {
             massifOut << " (heap allocation functions) malloc/new/new[], "
                          "--alloc-fns, etc.\n";
         } else {
             massifOut << " 0x" << hex << ip.instructionPointer << dec << ": ";
-            if (ip.functionIndex) {
-                massifOut << stringify(ip.functionIndex);
+            if (ip.frame.functionIndex) {
+                massifOut << stringify(ip.frame.functionIndex);
             } else {
                 massifOut << "???";
             }
 
             massifOut << " (";
-            if (ip.fileIndex) {
-                massifOut << stringify(ip.fileIndex) << ':' << ip.line;
+            if (ip.frame.fileIndex) {
+                massifOut << stringify(ip.frame.fileIndex) << ':' << ip.frame.line;
             } else if (ip.moduleIndex) {
                 massifOut << stringify(ip.moduleIndex);
             } else {
                 massifOut << "???";
             }
             massifOut << ")\n";
         }
 
         auto writeSkipped = [&] {
             if (skipped) {
                 printIndent(massifOut, depth, " ");
                 massifOut << " n0: " << skippedLeaked << " in " << skipped << " places, all below massif's threshold ("
                           << massifThreshold << ")\n";
                 skipped = 0;
             }
         };
 
         if (!shouldStop) {
             for (const auto& merged : mergedAllocations) {
                 if (merged.leaked > 0 && static_cast<size_t>(merged.leaked) >= threshold) {
                     if (skippedLeaked > merged.leaked) {
                         // manually inject this entry to keep the output sorted
                         writeSkipped();
                     }
                     writeMassifBacktrace(merged.traces, merged.leaked, threshold, merged.ipIndex, depth + 1);
                 }
             }
             writeSkipped();
         }
     }
 
     void handleAllocation(const AllocationInfo& info, const AllocationIndex /*index*/) override
     {
         if (printHistogram) {
             ++sizeHistogram[info.size];
         }
 
         if (totalCost.leaked > 0 && static_cast<size_t>(totalCost.leaked) > lastMassifPeak && massifOut.is_open()) {
             massifAllocations = allocations;
             lastMassifPeak = totalCost.leaked;
         }
     }
 
     void handleTimeStamp(int64_t /*oldStamp*/, int64_t newStamp) override
     {
         if (massifOut.is_open()) {
             writeMassifSnapshot(newStamp, newStamp == totalTime);
         }
     }
 
     void handleDebuggee(const char* command) override
     {
         cout << "Debuggee command was: " << command << endl;
         if (massifOut.is_open()) {
             writeMassifHeader(command);
         }
     }
 
     bool printHistogram = false;
     bool mergeBacktraces = true;
 
     vector<MergedAllocation> mergedAllocations;
 
     std::map<uint64_t, uint64_t> sizeHistogram;
 
     uint64_t massifSnapshotId = 0;
     uint64_t lastMassifPeak = 0;
     vector<Allocation> massifAllocations;
     ofstream massifOut;
     double massifThreshold = 1;
     uint64_t massifDetailedFreq = 1;
 
     string filterBtFunction;
     size_t peakLimit = 10;
     size_t subPeakLimit = 5;
 };
 }
 
 int main(int argc, char** argv)
 {
     po::options_description desc("Options", 120, 60);
     desc.add_options()("file,f", po::value<string>(), "The heaptrack data file to print.")(
         "diff,d", po::value<string>()->default_value({}), "Find the differences to this file.")(
         "shorten-templates,t", po::value<bool>()->default_value(true)->implicit_value(true),
         "Shorten template identifiers.")("merge-backtraces,m",
                                          po::value<bool>()->default_value(true)->implicit_value(true),
                                          "Merge backtraces.\nNOTE: the merged peak consumption is not correct.")(
         "print-peaks,p", po::value<bool>()->default_value(true)->implicit_value(true),
         "Print backtraces to top allocators, sorted by peak consumption.")(
         "print-allocators,a", po::value<bool>()->default_value(true)->implicit_value(true),
         "Print backtraces to top allocators, sorted by number of calls to "
         "allocation functions.")("print-temporary,T", po::value<bool>()->default_value(true)->implicit_value(true),
                                  "Print backtraces to top allocators, sorted by number of temporary "
                                  "allocations.")("print-leaks,l",
                                                  po::value<bool>()->default_value(false)->implicit_value(true),
                                                  "Print backtraces to leaked memory allocations.")(
         "print-overall-allocated,o", po::value<bool>()->default_value(false)->implicit_value(true),
         "Print top overall allocators, ignoring memory frees.")(
         "peak-limit,n", po::value<size_t>()->default_value(10)->implicit_value(10),
         "Limit the number of reported peaks.")("sub-peak-limit,s",
                                                po::value<size_t>()->default_value(5)->implicit_value(5),
                                                "Limit the number of reported backtraces of merged peak locations.")(
         "print-histogram,H", po::value<string>()->default_value(string()),
         "Path to output file where an allocation size histogram will be written "
         "to.")("print-flamegraph,F", po::value<string>()->default_value(string()),
                "Path to output file where a flame-graph compatible stack file will be "
                "written to.\n"
                "To visualize the resulting file, use flamegraph.pl from "
                "https://github.com/brendangregg/FlameGraph:\n"
                "  heaptrack_print heaptrack.someapp.PID.gz -F stacks.txt\n"
                "  # optionally pass --reverse to flamegraph.pl\n"
                "  flamegraph.pl --title \"heaptrack: allocations\" --colors mem \\\n"
                "    --countname allocations < stacks.txt > heaptrack.someapp.PID.svg\n"
                "  [firefox|chromium] heaptrack.someapp.PID.svg\n")(
         "print-massif,M", po::value<string>()->default_value(string()),
         "Path to output file where a massif compatible data file will be written "
         "to.")("massif-threshold", po::value<double>()->default_value(1.),
                "Percentage of current memory usage, below which allocations are "
                "aggregated into a 'below threshold' entry.\n"
                "This is only used in the massif output file so far.\n")(
         "massif-detailed-freq", po::value<size_t>()->default_value(2),
         "Frequency of detailed snapshots in the massif output file. Increase "
         "this to reduce the file size.\n"
         "You can set the value to zero to disable detailed snapshots.\n")(
         "filter-bt-function", po::value<string>()->default_value(string()),
         "Only print allocations where the backtrace contains the given "
         "function.")("help,h", "Show this help message.")("version,v", "Displays version information.");
     po::positional_options_description p;
     p.add("file", -1);
 
     po::variables_map vm;
     try {
         po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
         if (vm.count("help")) {
             cout << "heaptrack_print - analyze heaptrack data files.\n"
                  << "\n"
                  << "heaptrack is a heap memory profiler which records information\n"
                  << "about calls to heap allocation functions such as malloc, "
                     "operator new etc. pp.\n"
                  << "This print utility can then be used to analyze the generated "
                     "data files.\n\n"
                  << desc << endl;
             return 0;
         } else if (vm.count("version")) {
             cout << "heaptrack_print " << HEAPTRACK_VERSION_STRING << endl;
             return 0;
         }
         po::notify(vm);
     } catch (const po::error& error) {
         cerr << "ERROR: " << error.what() << endl << endl << desc << endl;
         return 1;
     }
 
     if (!vm.count("file")) {
         // NOTE: stay backwards compatible to old boost 1.41 available in RHEL 6
         //       otherwise, we could simplify this by setting the file option
         //       as ->required() using the new 1.42 boost API
         cerr << "ERROR: the option '--file' is required but missing\n\n" << desc << endl;
         return 1;
     }
 
     Printer data;
 
     const auto inputFile = vm["file"].as<string>();
     const auto diffFile = vm["diff"].as<string>();
     data.shortenTemplates = vm["shorten-templates"].as<bool>();
     data.mergeBacktraces = vm["merge-backtraces"].as<bool>();
     data.filterBtFunction = vm["filter-bt-function"].as<string>();
     data.peakLimit = vm["peak-limit"].as<size_t>();
     data.subPeakLimit = vm["sub-peak-limit"].as<size_t>();
     const string printHistogram = vm["print-histogram"].as<string>();
     data.printHistogram = !printHistogram.empty();
     const string printFlamegraph = vm["print-flamegraph"].as<string>();
     const string printMassif = vm["print-massif"].as<string>();
     if (!printMassif.empty()) {
         data.massifOut.open(printMassif, ios_base::out);
         if (!data.massifOut.is_open()) {
             cerr << "Failed to open massif output file \"" << printMassif << "\"." << endl;
             return 1;
         }
         data.massifThreshold = vm["massif-threshold"].as<double>();
         data.massifDetailedFreq = vm["massif-detailed-freq"].as<size_t>();
     }
     const bool printLeaks = vm["print-leaks"].as<bool>();
     const bool printOverallAlloc = vm["print-overall-allocated"].as<bool>();
     const bool printPeaks = vm["print-peaks"].as<bool>();
     const bool printAllocs = vm["print-allocators"].as<bool>();
     const bool printTemporary = vm["print-temporary"].as<bool>();
 
     cout << "reading file \"" << inputFile << "\" - please wait, this might take some time..." << endl;
 
     if (!diffFile.empty()) {
         cout << "reading diff file \"" << diffFile << "\" - please wait, this might take some time..." << endl;
         Printer diffData;
         auto diffRead = async(launch::async, [&diffData, diffFile]() { return diffData.read(diffFile); });
 
         if (!data.read(inputFile) || !diffRead.get()) {
             return 1;
         }
 
         data.diff(diffData);
     } else if (!data.read(inputFile)) {
         return 1;
     }
 
     data.finalize();
 
     cout << "finished reading file, now analyzing data:\n" << endl;
 
     if (printAllocs) {
         // sort by amount of allocations
         cout << "MOST CALLS TO ALLOCATION FUNCTIONS\n";
         data.printAllocations(&AllocationData::allocations,
                               [](const AllocationData& data) {
                                   cout << data.allocations << " calls to allocation functions with "
                                        << formatBytes(data.peak) << " peak consumption from\n";
                               },
                               [](const AllocationData& data) {
                                   cout << data.allocations << " calls with " << formatBytes(data.peak)
                                        << " peak consumption from:\n";
                               });
         cout << endl;
     }
 
     if (printOverallAlloc) {
         cout << "MOST BYTES ALLOCATED OVER TIME (ignoring deallocations)\n";
         data.printAllocations(&AllocationData::allocated,
                               [](const AllocationData& data) {
                                   cout << formatBytes(data.allocated) << " allocated over " << data.allocations
                                        << " calls from\n";
                               },
                               [](const AllocationData& data) {
                                   cout << formatBytes(data.allocated) << " allocated over " << data.allocations
                                        << " calls from:\n";
                               });
         cout << endl;
     }
 
     if (printPeaks) {
         /// FIXME: find a way to merge this without breaking temporal dependency.
         /// I.e. a given function could be called N times from different places
         /// and allocate M bytes each, but free it thereafter.
         /// Then the below would give a wrong total peak size of N * M instead
         /// of just N!
         cout << "PEAK MEMORY CONSUMERS\n";
         if (data.mergeBacktraces) {
             cout << "\nWARNING - the data below is not an accurate calculation of"
                     " the total peak consumption and can easily be wrong.\n"
                     " For an accurate overview, disable backtrace merging.\n";
         }
 
         data.printAllocations(&AllocationData::peak,
                               [](const AllocationData& data) {
                                   cout << formatBytes(data.peak) << " peak memory consumed over " << data.allocations
                                        << " calls from\n";
                               },
                               [](const AllocationData& data) {
                                   cout << formatBytes(data.peak) << " consumed over " << data.allocations
                                        << " calls from:\n";
                               });
     }
 
     if (printLeaks) {
         // sort by amount of leaks
         cout << "MEMORY LEAKS\n";
         data.printAllocations(&AllocationData::leaked,
                               [](const AllocationData& data) {
                                   cout << formatBytes(data.leaked) << " leaked over " << data.allocations
                                        << " calls from\n";
                               },
                               [](const AllocationData& data) {
                                   cout << formatBytes(data.leaked) << " leaked over " << data.allocations
                                        << " calls from:\n";
                               });
         cout << endl;
     }
 
     if (printTemporary) {
         // sort by amount of temporary allocations
         cout << "MOST TEMPORARY ALLOCATIONS\n";
         data.printAllocations(&AllocationData::temporary,
                               [](const AllocationData& data) {
                                   cout << data.temporary << " temporary allocations of " << data.allocations
                                        << " allocations in total (" << setprecision(2)
                                        << (float(data.temporary) * 100.f / data.allocations) << "%) from\n";
                               },
                               [](const AllocationData& data) {
                                   cout << data.temporary << " temporary allocations of " << data.allocations
                                        << " allocations in total (" << setprecision(2)
                                        << (float(data.temporary) * 100.f / data.allocations) << "%) from:\n";
                               });
         cout << endl;
     }
 
     const double totalTimeS = 0.001 * data.totalTime;
     cout << "total runtime: " << fixed << totalTimeS << "s.\n"
          << "bytes allocated in total (ignoring deallocations): " << formatBytes(data.totalCost.allocated) << " ("
          << formatBytes(data.totalCost.allocated / totalTimeS) << "/s)" << '\n'
          << "calls to allocation functions: " << data.totalCost.allocations << " ("
          << int64_t(data.totalCost.allocations / totalTimeS) << "/s)\n"
          << "temporary memory allocations: " << data.totalCost.temporary << " ("
          << int64_t(data.totalCost.temporary / totalTimeS) << "/s)\n"
          << "peak heap memory consumption: " << formatBytes(data.totalCost.peak) << '\n'
          << "peak RSS (including heaptrack overhead): " << formatBytes(data.peakRSS * data.systemInfo.pageSize) << '\n'
          << "total memory leaked: " << formatBytes(data.totalCost.leaked) << '\n';
 
     if (!printHistogram.empty()) {
         ofstream histogram(printHistogram, ios_base::out);
         if (!histogram.is_open()) {
             cerr << "Failed to open histogram output file \"" << printHistogram << "\"." << endl;
         } else {
             for (auto entry : data.sizeHistogram) {
                 histogram << entry.first << '\t' << entry.second << '\n';
             }
         }
     }
 
     if (!printFlamegraph.empty()) {
         ofstream flamegraph(printFlamegraph, ios_base::out);
         if (!flamegraph.is_open()) {
             cerr << "Failed to open flamegraph output file \"" << printFlamegraph << "\"." << endl;
         } else {
             for (const auto& allocation : data.allocations) {
                 if (!allocation.traceIndex) {
                     flamegraph << "??";
                 } else {
                     data.printFlamegraph(data.findTrace(allocation.traceIndex), flamegraph);
                 }
                 flamegraph << ' ' << allocation.allocations << '\n';
             }
         }
     }
 
     return 0;
 }
diff --git a/src/interpret/heaptrack_interpret.cpp b/src/interpret/heaptrack_interpret.cpp
index 68feac5..eab1100 100644
--- a/src/interpret/heaptrack_interpret.cpp
+++ b/src/interpret/heaptrack_interpret.cpp
@@ -1,445 +1,486 @@
 /*
  * Copyright 2014-2017 Milian Wolff <mail@milianw.de>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 /**
  * @file heaptrack_interpret.cpp
  *
  * @brief Interpret raw heaptrack data and add Dwarf based debug information.
  */
 
 #include <algorithm>
 #include <cinttypes>
 #include <iostream>
 #include <sstream>
 #include <stdio_ext.h>
 #include <tuple>
 #include <unordered_map>
 #include <vector>
 
 #include <cxxabi.h>
 
 #include <boost/algorithm/string/predicate.hpp>
 
 #include "libbacktrace/backtrace.h"
 #include "libbacktrace/internal.h"
 #include "util/linereader.h"
 #include "util/pointermap.h"
 
 #include <unistd.h>
 
 using namespace std;
 
 namespace {
 
 string demangle(const char* function)
 {
     if (!function) {
         return {};
     } else if (function[0] != '_' || function[1] != 'Z') {
         return {function};
     }
 
     string ret;
     int status = 0;
     char* demangled = abi::__cxa_demangle(function, 0, 0, &status);
     if (demangled) {
         ret = demangled;
         free(demangled);
     }
     return ret;
 }
 
-struct AddressInformation
+struct Frame
 {
+    Frame(string function = {}, string file = {}, int line = 0)
+        : function(function)
+        , file(file)
+        , line(line)
+    {}
+
+    bool isValid() const
+    {
+        return !function.empty();
+    }
+
     string function;
     string file;
-    int line = 0;
+    int line;
+};
+
+struct AddressInformation
+{
+    Frame frame;
+    vector<Frame> inlined;
+};
+
+struct ResolvedFrame
+{
+    ResolvedFrame(size_t functionIndex = 0, size_t fileIndex = 0, int line = 0)
+        : functionIndex(functionIndex)
+        , fileIndex(fileIndex)
+        , line(line)
+    {}
+    size_t functionIndex;
+    size_t fileIndex;
+    int line;
+};
+
+struct ResolvedIP
+{
+    size_t moduleIndex = 0;
+    ResolvedFrame frame;
+    vector<ResolvedFrame> inlined;
 };
 
 struct Module
 {
     Module(uintptr_t addressStart, uintptr_t addressEnd, backtrace_state* backtraceState, size_t moduleIndex)
         : addressStart(addressStart)
         , addressEnd(addressEnd)
         , moduleIndex(moduleIndex)
         , backtraceState(backtraceState)
     {
     }
 
     AddressInformation resolveAddress(uintptr_t address) const
     {
         AddressInformation info;
         if (!backtraceState) {
             return info;
         }
 
+        // try to find frame information from debug information
         backtrace_pcinfo(backtraceState, address,
                          [](void* data, uintptr_t /*addr*/, const char* file, int line, const char* function) -> int {
+                             Frame frame(demangle(function), file ? file : "", line);
                              auto info = reinterpret_cast<AddressInformation*>(data);
-                             info->function = demangle(function);
-                             info->file = file ? file : "";
-                             info->line = line;
+                             if (!info->frame.isValid()) {
+                                info->frame = frame;
+                             } else {
+                                info->inlined.push_back(frame);
+                             }
                              return 0;
                          },
                          [](void* /*data*/, const char* /*msg*/, int /*errnum*/) {}, &info);
 
-        if (info.function.empty()) {
+        // no debug information available? try to fallback on the symbol table information
+        if (!info.frame.isValid()) {
             backtrace_syminfo(
                 backtraceState, address,
                 [](void* data, uintptr_t /*pc*/, const char* symname, uintptr_t /*symval*/, uintptr_t /*symsize*/) {
                     if (symname) {
-                        reinterpret_cast<AddressInformation*>(data)->function = demangle(symname);
+                        reinterpret_cast<AddressInformation*>(data)->frame.function = demangle(symname);
                     }
                 },
                 [](void* /*data*/, const char* msg, int errnum) {
                     cerr << "Module backtrace error (code " << errnum << "): " << msg << endl;
                 },
                 &info);
         }
 
         return info;
     }
 
     bool operator<(const Module& module) const
     {
         return tie(addressStart, addressEnd, moduleIndex)
             < tie(module.addressStart, module.addressEnd, module.moduleIndex);
     }
 
     bool operator!=(const Module& module) const
     {
         return tie(addressStart, addressEnd, moduleIndex)
             != tie(module.addressStart, module.addressEnd, module.moduleIndex);
     }
 
     uintptr_t addressStart;
     uintptr_t addressEnd;
     size_t moduleIndex;
     backtrace_state* backtraceState;
 };
 
-struct ResolvedIP
-{
-    size_t moduleIndex = 0;
-    size_t fileIndex = 0;
-    size_t functionIndex = 0;
-    int line = 0;
-};
-
 struct AccumulatedTraceData
 {
     AccumulatedTraceData()
     {
         m_modules.reserve(256);
         m_backtraceStates.reserve(64);
         m_internedData.reserve(4096);
         m_encounteredIps.reserve(32768);
     }
 
     ~AccumulatedTraceData()
     {
         fprintf(stdout, "# strings: %zu\n# ips: %zu\n", m_internedData.size(), m_encounteredIps.size());
     }
 
     ResolvedIP resolve(const uintptr_t ip)
     {
         if (m_modulesDirty) {
             // sort by addresses, required for binary search below
             sort(m_modules.begin(), m_modules.end());
 
 #ifndef NDEBUG
             for (size_t i = 0; i < m_modules.size(); ++i) {
                 const auto& m1 = m_modules[i];
                 for (size_t j = i + 1; j < m_modules.size(); ++j) {
                     if (i == j) {
                         continue;
                     }
                     const auto& m2 = m_modules[j];
                     if ((m1.addressStart <= m2.addressStart && m1.addressEnd > m2.addressStart)
                         || (m1.addressStart < m2.addressEnd && m1.addressEnd >= m2.addressEnd)) {
                         cerr << "OVERLAPPING MODULES: " << hex << m1.moduleIndex << " (" << m1.addressStart << " to "
                              << m1.addressEnd << ") and " << m1.moduleIndex << " (" << m2.addressStart << " to "
                              << m2.addressEnd << ")\n"
                              << dec;
                     } else if (m2.addressStart >= m1.addressEnd) {
                         break;
                     }
                 }
             }
 #endif
 
             m_modulesDirty = false;
         }
 
+        auto resolveFrame = [this](const Frame& frame)
+        {
+            return ResolvedFrame{intern(frame.function), intern(frame.file), frame.line};
+        };
+
         ResolvedIP data;
         // find module for this instruction pointer
         auto module =
             lower_bound(m_modules.begin(), m_modules.end(), ip,
                         [](const Module& module, const uintptr_t ip) -> bool { return module.addressEnd < ip; });
         if (module != m_modules.end() && module->addressStart <= ip && module->addressEnd >= ip) {
             data.moduleIndex = module->moduleIndex;
             const auto info = module->resolveAddress(ip);
-            data.fileIndex = intern(info.file);
-            data.functionIndex = intern(info.function);
-            data.line = info.line;
+            data.frame = resolveFrame(info.frame);
+            std::transform(info.inlined.begin(), info.inlined.end(), std::back_inserter(data.inlined),
+                           resolveFrame);
         }
         return data;
     }
 
     size_t intern(const string& str, std::string* internedString = nullptr)
     {
         if (str.empty()) {
             return 0;
         }
 
         auto it = m_internedData.find(str);
         if (it != m_internedData.end()) {
             if (internedString) {
                 *internedString = it->first;
             }
             return it->second;
         }
         const size_t id = m_internedData.size() + 1;
         it = m_internedData.insert(it, make_pair(str, id));
         if (internedString) {
             *internedString = it->first;
         }
         fprintf(stdout, "s %s\n", str.c_str());
         return id;
     }
 
     void addModule(backtrace_state* backtraceState, const size_t moduleIndex, const uintptr_t addressStart,
                    const uintptr_t addressEnd)
     {
         m_modules.emplace_back(addressStart, addressEnd, backtraceState, moduleIndex);
         m_modulesDirty = true;
     }
 
     void clearModules()
     {
         // TODO: optimize this, reuse modules that are still valid
         m_modules.clear();
         m_modulesDirty = true;
     }
 
     size_t addIp(const uintptr_t instructionPointer)
     {
         if (!instructionPointer) {
             return 0;
         }
 
         auto it = m_encounteredIps.find(instructionPointer);
         if (it != m_encounteredIps.end()) {
             return it->second;
         }
 
         const size_t ipId = m_encounteredIps.size() + 1;
         m_encounteredIps.insert(it, make_pair(instructionPointer, ipId));
 
         const auto ip = resolve(instructionPointer);
         fprintf(stdout, "i %zx %zx", instructionPointer, ip.moduleIndex);
-        if (ip.functionIndex || ip.fileIndex) {
-            fprintf(stdout, " %zx", ip.functionIndex);
-            if (ip.fileIndex) {
-                fprintf(stdout, " %zx %x", ip.fileIndex, ip.line);
+        if (ip.frame.functionIndex || ip.frame.fileIndex) {
+            fprintf(stdout, " %zx", ip.frame.functionIndex);
+            if (ip.frame.fileIndex) {
+                fprintf(stdout, " %zx %x", ip.frame.fileIndex, ip.frame.line);
+                for (const auto& inlined : ip.inlined) {
+                    fprintf(stdout, " %zx %zx %x", inlined.functionIndex, inlined.fileIndex, inlined.line);
+                }
             }
         }
         fputc('\n', stdout);
         return ipId;
     }
 
     std::string findDebugFile(const std::string& input) const
     {
         // TODO: also try to find a debug file by build-id
         // TODO: also lookup in (user-configurable) debug path
         std::string file = input + ".debug";
         if (access(file.c_str(), R_OK) == 0) {
             return file;
         } else {
             return input;
         }
     }
 
     /**
      * Prevent the same file from being initialized multiple times.
      * This drastically cuts the memory consumption down
      */
     backtrace_state* findBacktraceState(const std::string& originalFileName, uintptr_t addressStart)
     {
         if (boost::algorithm::starts_with(originalFileName, "linux-vdso.so")) {
             // prevent warning, since this will always fail
             return nullptr;
         }
 
         auto it = m_backtraceStates.find(originalFileName);
         if (it != m_backtraceStates.end()) {
             return it->second;
         }
 
         const auto fileName = findDebugFile(originalFileName);
 
         struct CallbackData
         {
             const char* fileName;
         };
         CallbackData data = {fileName.c_str()};
 
         auto errorHandler = [](void* rawData, const char* msg, int errnum) {
             auto data = reinterpret_cast<const CallbackData*>(rawData);
             cerr << "Failed to create backtrace state for module " << data->fileName << ": " << msg << " / "
                  << strerror(errnum) << " (error code " << errnum << ")" << endl;
         };
 
         auto state = backtrace_create_state(data.fileName, /* we are single threaded, so: not thread safe */ false,
                                             errorHandler, &data);
 
         if (state) {
             const int descriptor = backtrace_open(data.fileName, errorHandler, &data, nullptr);
             if (descriptor >= 1) {
                 int foundSym = 0;
                 int foundDwarf = 0;
                 auto ret = elf_add(state, descriptor, addressStart, errorHandler, &data, &state->fileline_fn, &foundSym,
                                    &foundDwarf, false);
                 if (ret && foundSym) {
                     state->syminfo_fn = &elf_syminfo;
                 }
             }
         }
 
         m_backtraceStates.insert(it, make_pair(originalFileName, state));
 
         return state;
     }
 
 private:
     vector<Module> m_modules;
     unordered_map<std::string, backtrace_state*> m_backtraceStates;
     bool m_modulesDirty = false;
 
     unordered_map<string, size_t> m_internedData;
     unordered_map<uintptr_t, size_t> m_encounteredIps;
 };
 }
 
 int main(int /*argc*/, char** /*argv*/)
 {
     // optimize: we only have a single thread
     ios_base::sync_with_stdio(false);
     __fsetlocking(stdout, FSETLOCKING_BYCALLER);
     __fsetlocking(stdin, FSETLOCKING_BYCALLER);
 
     AccumulatedTraceData data;
 
     LineReader reader;
 
     string exe;
 
     PointerMap ptrToIndex;
     uint64_t lastPtr = 0;
     AllocationInfoSet allocationInfos;
 
     uint64_t allocations = 0;
     uint64_t leakedAllocations = 0;
     uint64_t temporaryAllocations = 0;
 
     while (reader.getLine(cin)) {
         if (reader.mode() == 'x') {
             reader >> exe;
         } else if (reader.mode() == 'm') {
             string fileName;
             reader >> fileName;
             if (fileName == "-") {
                 data.clearModules();
             } else {
                 if (fileName == "x") {
                     fileName = exe;
                 }
                 std::string internedString;
                 const auto moduleIndex = data.intern(fileName, &internedString);
                 uintptr_t addressStart = 0;
                 if (!(reader >> addressStart)) {
                     cerr << "failed to parse line: " << reader.line() << endl;
                     return 1;
                 }
                 auto state = data.findBacktraceState(internedString, addressStart);
                 uintptr_t vAddr = 0;
                 uintptr_t memSize = 0;
                 while ((reader >> vAddr) && (reader >> memSize)) {
                     data.addModule(state, moduleIndex, addressStart + vAddr, addressStart + vAddr + memSize);
                 }
             }
         } else if (reader.mode() == 't') {
             uintptr_t instructionPointer = 0;
             size_t parentIndex = 0;
             if (!(reader >> instructionPointer) || !(reader >> parentIndex)) {
                 cerr << "failed to parse line: " << reader.line() << endl;
                 return 1;
             }
             // ensure ip is encountered
             const auto ipId = data.addIp(instructionPointer);
             // trace point, map current output index to parent index
             fprintf(stdout, "t %zx %zx\n", ipId, parentIndex);
         } else if (reader.mode() == '+') {
             ++allocations;
             ++leakedAllocations;
             uint64_t size = 0;
             TraceIndex traceId;
             uint64_t ptr = 0;
             if (!(reader >> size) || !(reader >> traceId.index) || !(reader >> ptr)) {
                 cerr << "failed to parse line: " << reader.line() << endl;
                 continue;
             }
 
             AllocationIndex index;
             if (allocationInfos.add(size, traceId, &index)) {
                 fprintf(stdout, "a %" PRIx64 " %x\n", size, traceId.index);
             }
             ptrToIndex.addPointer(ptr, index);
             lastPtr = ptr;
             fprintf(stdout, "+ %x\n", index.index);
         } else if (reader.mode() == '-') {
             uint64_t ptr = 0;
             if (!(reader >> ptr)) {
                 cerr << "failed to parse line: " << reader.line() << endl;
                 continue;
             }
             bool temporary = lastPtr == ptr;
             lastPtr = 0;
             auto allocation = ptrToIndex.takePointer(ptr);
             if (!allocation.second) {
                 continue;
             }
             fprintf(stdout, "- %x\n", allocation.first.index);
             if (temporary) {
                 ++temporaryAllocations;
             }
             --leakedAllocations;
         } else {
             fputs(reader.line().c_str(), stdout);
             fputc('\n', stdout);
         }
     }
 
     fprintf(stderr, "heaptrack stats:\n"
                     "\tallocations:          \t%" PRIu64 "\n"
                     "\tleaked allocations:   \t%" PRIu64 "\n"
                     "\ttemporary allocations:\t%" PRIu64 "\n",
             allocations, leakedAllocations, temporaryAllocations);
 
     return 0;
 }
diff --git a/tests/manual/CMakeLists.txt b/tests/manual/CMakeLists.txt
index 831c7e2..0a978f2 100644
--- a/tests/manual/CMakeLists.txt
+++ b/tests/manual/CMakeLists.txt
@@ -1,19 +1,23 @@
 set(CMAKE_BUILD_TYPE Debug)
 
 add_executable(test_c test.c)
 add_executable(test_cpp test.cpp)
 add_executable(threaded threaded.cpp)
 target_link_libraries(threaded ${CMAKE_THREAD_LIBS_INIT})
 
 add_executable(callgraph callgraph.cpp)
 
 add_library(testlib SHARED lib.cpp)
 add_executable(test_lib test_lib.cpp)
 target_link_libraries(test_lib testlib)
 
 add_executable(test_aggregation test_aggregation.cpp)
 
 add_executable(signals signals.cpp)
 target_link_libraries(signals ${CMAKE_THREAD_LIBS_INIT})
 
 add_executable(libc_leaks libc_leaks.c)
+
+set(CMAKE_BUILD_TYPE RelWithDebInfo)
+
+add_executable(inlining inlining.cpp)
diff --git a/tests/manual/inlining.cpp b/tests/manual/inlining.cpp
new file mode 100644
index 0000000..45342dd
--- /dev/null
+++ b/tests/manual/inlining.cpp
@@ -0,0 +1,19 @@
+inline void __attribute__((always_inline)) asdf()
+{
+    new char[1234];
+}
+
+inline void __attribute__((always_inline)) bar()
+{
+    asdf();
+}
+
+inline void __attribute__((always_inline)) foo()
+{
+    bar();
+}
+
+int main()
+{
+    foo();
+}