diff --git a/kdevplatform/language/duchain/topducontextdynamicdata.cpp b/kdevplatform/language/duchain/topducontextdynamicdata.cpp index c9a37d34d0..3f0cdd8c25 100644 --- a/kdevplatform/language/duchain/topducontextdynamicdata.cpp +++ b/kdevplatform/language/duchain/topducontextdynamicdata.cpp @@ -1,851 +1,855 @@ /* This is part of KDevelop Copyright 2014 Milian Wolff Copyright 2008 David Nolden This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "topducontextdynamicdata.h" #include #include #include #include "declaration.h" #include "declarationdata.h" #include "ducontext.h" #include "topducontext.h" #include "topducontextdata.h" #include "ducontextdata.h" #include "ducontextdynamicdata.h" #include "duchainregister.h" #include "serialization/itemrepository.h" #include "problem.h" #include //#define DEBUG_DATA_INFO //This might be problematic on some systems, because really many mmaps are created #define USE_MMAP using namespace KDevelop; namespace { /** * Serialize @p item into @p data and update @p totalDataOffset. * * If @p isSharedDataItem is true, then the item's internal data pointer is not updated * to point to the serialized data. Otherwise the dynamic data is deleted and the items * data will point to the constant serialized data. * * NOTE: The above is required to support serialization of shared-data such as from ProblemPointer. * If we'd set the data to point to the constant region, we'd get crashes due to use-after-free when * we unmap the data and a shared pointer outlives that. */ void saveDUChainItem(QVector& data, DUChainBase& item, uint& totalDataOffset, bool isSharedDataItem) { if(!item.d_func()->classId) { //If this triggers, you have probably created an own DUChainBase based class, but haven't called setClassId(this) in the constructor. qCritical() << "no class-id set for data attached to a declaration of type" << typeid(item).name(); Q_ASSERT(0); } int size = DUChainItemSystem::self().dynamicSize(*item.d_func()); if(data.back().array.size() - int(data.back().position) < size) //Create a new data item data.append({QByteArray(size > 10000 ? size : 10000, 0), 0u}); uint pos = data.back().position; data.back().position += size; totalDataOffset += size; DUChainBaseData& target(*(reinterpret_cast(data.back().array.data() + pos))); if(item.d_func()->isDynamic()) { //Change from dynamic data to constant data enableDUChainReferenceCounting(data.back().array.data(), data.back().array.size()); DUChainItemSystem::self().copy(*item.d_func(), target, true); Q_ASSERT(!target.isDynamic()); if (!isSharedDataItem) { item.setData(&target); } disableDUChainReferenceCounting(data.back().array.data()); }else{ //Just copy the data into another place, expensive copy constructors are not needed #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif memcpy(&target, item.d_func(), size); +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic pop #endif if (!isSharedDataItem) { item.setData(&target, false); } } if (!isSharedDataItem) { Q_ASSERT(item.d_func() == &target); Q_ASSERT(!item.d_func()->isDynamic()); } } uint indexForParentContext(DUContext* context) { return LocalIndexedDUContext(context->parentContext()).localIndex(); } uint indexForParentContext(Declaration* declaration) { return LocalIndexedDUContext(declaration->context()).localIndex(); } uint indexForParentContext(const ProblemPointer& /*problem*/) { // always stored in the top context return 0; } #ifndef QT_NO_DEBUG void validateItem(const DUChainBaseData* const data, const uchar* const mappedData, const size_t mappedDataSize) { Q_ASSERT(!data->isDynamic()); if (mappedData) { Q_ASSERT(((size_t)data) < ((size_t)mappedData) || ((size_t)data) > ((size_t)mappedData) + mappedDataSize); } } #endif const char* pointerInData(const QVector& data, uint totalOffset) { for(int a = 0; a < data.size(); ++a) { if(totalOffset < data[a].position) return data[a].array.constData() + totalOffset; totalOffset -= data[a].position; } Q_ASSERT_X(false, Q_FUNC_INFO, "Offset doesn't exist in the data."); return nullptr; } void verifyDataInfo(const TopDUContextDynamicData::ItemDataInfo& info, const QVector& data) { Q_UNUSED(info); Q_UNUSED(data); #ifdef DEBUG_DATA_INFO DUChainBaseData* item = (DUChainBaseData*)(pointerInData(data, info.dataOffset)); int size = DUChainItemSystem::self().dynamicSize(*item); Q_ASSERT(size); #endif } QString basePath() { return globalItemRepositoryRegistry().path() + "/topcontexts/"; } QString pathForTopContext(const uint topContextIndex) { return basePath() + QString::number(topContextIndex); } enum LoadType { PartialLoad, ///< Only load the direct member data FullLoad ///< Load everything, including appended lists }; template void loadTopDUContextData(const uint topContextIndex, LoadType loadType, F callback) { QFile file(pathForTopContext(topContextIndex)); if (!file.open(QIODevice::ReadOnly)) { return; } uint readValue; file.read((char*)&readValue, sizeof(uint)); // now readValue is filled with the top-context data size Q_ASSERT(readValue >= sizeof(TopDUContextData)); const QByteArray data = file.read(loadType == FullLoad ? readValue : sizeof(TopDUContextData)); const TopDUContextData* topData = reinterpret_cast(data.constData()); callback(topData); } template struct PtrType; template struct PtrType { using value = T*; }; template struct PtrType> { using value = T*; }; template Q_DECL_CONSTEXPR bool isSharedDataItem() { return false; } template<> Q_DECL_CONSTEXPR bool isSharedDataItem() { return true; } } //BEGIN DUChainItemStorage template TopDUContextDynamicData::DUChainItemStorage::DUChainItemStorage(TopDUContextDynamicData* data) : data(data) { } template TopDUContextDynamicData::DUChainItemStorage::~DUChainItemStorage() { clearItems(); } template void TopDUContextDynamicData::DUChainItemStorage::clearItems() { //Due to template specialization it's possible that a declaration is not reachable through the normal context structure. //For that reason we have to check here, and delete all remaining declarations. qDeleteAll(temporaryItems); temporaryItems.clear(); qDeleteAll(items); items.clear(); } namespace KDevelop { template<> void TopDUContextDynamicData::DUChainItemStorage::clearItems() { // don't delete anything - the problem is shared items.clear(); } } template void TopDUContextDynamicData::DUChainItemStorage::clearItemIndex(const Item& item, const uint index) { if(!data->m_dataLoaded) data->loadData(); if (index < (0x0fffffff/2)) { if (index == 0 || index > uint(items.size())) { return; } else { const uint realIndex = index - 1; Q_ASSERT(items[realIndex] == item); items[realIndex] = nullptr; if (realIndex < (uint)offsets.size()) { offsets[realIndex] = ItemDataInfo(); } } } else { const uint realIndex = 0x0fffffff - index; //We always keep the highest bit at zero if (realIndex == 0 || realIndex > uint(temporaryItems.size())) { return; } else { Q_ASSERT(temporaryItems[realIndex-1] == item); temporaryItems[realIndex-1] = nullptr; } } Q_UNUSED(item); } template void TopDUContextDynamicData::DUChainItemStorage::storeData(uint& currentDataOffset, const QVector& oldData) { auto const oldOffsets = offsets; offsets.clear(); for (int a = 0; a < items.size(); ++a) { auto item = items[a]; if (!item) { if (oldOffsets.size() > a && oldOffsets[a].dataOffset) { //Directly copy the old data range into the new data const DUChainBaseData* itemData = nullptr; if (data->m_mappedData) { itemData = reinterpret_cast(data->m_mappedData + oldOffsets[a].dataOffset); } else { itemData = reinterpret_cast(::pointerInData(oldData, oldOffsets[a].dataOffset)); } offsets << data->writeDataInfo(oldOffsets[a], itemData, currentDataOffset); } else { offsets << ItemDataInfo(); } } else { offsets << ItemDataInfo{currentDataOffset, indexForParentContext(item)}; saveDUChainItem(data->m_data, *item, currentDataOffset, isSharedDataItem()); } } #ifndef QT_NO_DEBUG if (!isSharedDataItem()) { for (auto item : items) { if (item) { validateItem(item->d_func(), data->m_mappedData, data->m_mappedDataSize); } } } #endif } template bool TopDUContextDynamicData::DUChainItemStorage::itemsHaveChanged() const { for (auto item : items) { if (item && item->d_func()->m_dynamic) { return true; } } return false; } template uint TopDUContextDynamicData::DUChainItemStorage::allocateItemIndex(const Item& item, const bool temporary) { if (!data->m_dataLoaded) { data->loadData(); } if (!temporary) { items.append(item); return items.size(); } else { temporaryItems.append(item); return 0x0fffffff - temporaryItems.size(); //We always keep the highest bit at zero } } template bool TopDUContextDynamicData::DUChainItemStorage::isItemForIndexLoaded(uint index) const { if (!data->m_dataLoaded) { return false; } if (index < (0x0fffffff/2)) { if (index == 0 || index > uint(items.size())) { return false; } return items[index-1]; } else { // temporary item return true; } } template Item TopDUContextDynamicData::DUChainItemStorage::getItemForIndex(uint index) const { if (index >= (0x0fffffff/2)) { index = 0x0fffffff - index; //We always keep the highest bit at zero if(index == 0 || index > uint(temporaryItems.size())) return {}; else return temporaryItems.at(index-1); } if (index == 0 || index > static_cast(items.size())) { qCWarning(LANGUAGE) << "item index out of bounds:" << index << "count:" << items.size(); return {}; } const uint realIndex = index - 1;; const auto& item = items.at(realIndex); if (item) { //Shortcut, because this is the most common case return item; } if (realIndex < (uint)offsets.size() && offsets[realIndex].dataOffset) { Q_ASSERT(!data->m_itemRetrievalForbidden); //Construct the context, and eventuall its parent first ///TODO: ugly, remove need for const_cast auto itemData = const_cast( reinterpret_cast(data->pointerInData(offsets[realIndex].dataOffset)) ); auto& item = items[realIndex]; item = dynamic_cast::value>(DUChainItemSystem::self().create(itemData)); if (!item) { //When this happens, the item has not been registered correctly. //We can stop here, because else we will get crashes later. qCritical() << "Failed to load item with identity" << itemData->classId; return {}; } if (isSharedDataItem()) { // NOTE: shared data must never point to mmapped data regions as otherwise we might end up with // use-after-free or double-deletions etc. pp. // thus, make the item always dynamic after deserialization item->makeDynamic(); } auto parent = data->getContextForIndex(offsets[realIndex].parentContext); Q_ASSERT_X(parent, Q_FUNC_INFO, "Could not find parent context for loaded item.\n" "Potentially, the context has been deleted without deleting its children."); item->rebuildDynamicData(parent, index); } else { qCWarning(LANGUAGE) << "invalid item for index" << index << offsets.size() << offsets.value(realIndex).dataOffset; } return item; } template void TopDUContextDynamicData::DUChainItemStorage::deleteOnDisk() { for (auto& item : items) { if (item) { item->makeDynamic(); } } } template void TopDUContextDynamicData::DUChainItemStorage::loadData(QFile* file) const { Q_ASSERT(offsets.isEmpty()); Q_ASSERT(items.isEmpty()); uint readValue; file->read((char*)&readValue, sizeof(uint)); offsets.resize(readValue); file->read((char*)offsets.data(), sizeof(ItemDataInfo) * offsets.size()); //Fill with zeroes for now, will be initialized on-demand items.resize(offsets.size()); } template void TopDUContextDynamicData::DUChainItemStorage::writeData(QFile* file) { uint writeValue = offsets.size(); file->write((char*)&writeValue, sizeof(uint)); file->write((char*)offsets.data(), sizeof(ItemDataInfo) * offsets.size()); } //END DUChainItemStorage const char* TopDUContextDynamicData::pointerInData(uint totalOffset) const { Q_ASSERT(!m_mappedData || m_data.isEmpty()); if(m_mappedData && m_mappedDataSize) return (char*)m_mappedData + totalOffset; return ::pointerInData(m_data, totalOffset); } TopDUContextDynamicData::TopDUContextDynamicData(TopDUContext* topContext) : m_deleting(false) , m_topContext(topContext) , m_contexts(this) , m_declarations(this) , m_problems(this) , m_onDisk(false) , m_dataLoaded(true) , m_mappedFile(nullptr) , m_mappedData(nullptr) , m_mappedDataSize(0) , m_itemRetrievalForbidden(false) { } void KDevelop::TopDUContextDynamicData::clear() { m_contexts.clearItems(); m_declarations.clearItems(); m_problems.clearItems(); } TopDUContextDynamicData::~TopDUContextDynamicData() { unmap(); } void KDevelop::TopDUContextDynamicData::unmap() { delete m_mappedFile; m_mappedFile = nullptr; m_mappedData = nullptr; m_mappedDataSize = 0; } bool TopDUContextDynamicData::fileExists(uint topContextIndex) { return QFile::exists(pathForTopContext(topContextIndex)); } QList TopDUContextDynamicData::loadImporters(uint topContextIndex) { QList ret; loadTopDUContextData(topContextIndex, FullLoad, [&ret] (const TopDUContextData* topData) { ret.reserve(topData->m_importersSize()); FOREACH_FUNCTION(const IndexedDUContext& importer, topData->m_importers) ret << importer; }); return ret; } QList TopDUContextDynamicData::loadImports(uint topContextIndex) { QList ret; loadTopDUContextData(topContextIndex, FullLoad, [&ret] (const TopDUContextData* topData) { ret.reserve(topData->m_importedContextsSize()); FOREACH_FUNCTION(const DUContext::Import& import, topData->m_importedContexts) ret << import.indexedContext(); }); return ret; } IndexedString TopDUContextDynamicData::loadUrl(uint topContextIndex) { IndexedString url; loadTopDUContextData(topContextIndex, PartialLoad, [&url] (const TopDUContextData* topData) { Q_ASSERT(topData->m_url.isEmpty() || topData->m_url.index() >> 16); url = topData->m_url; }); return url; } void TopDUContextDynamicData::loadData() const { //This function has to be protected by an additional mutex, since it can be triggered from multiple threads at the same time static QMutex mutex; QMutexLocker lock(&mutex); if(m_dataLoaded) return; Q_ASSERT(!m_dataLoaded); Q_ASSERT(m_data.isEmpty()); QFile* file = new QFile(pathForTopContext(m_topContext->ownIndex())); bool open = file->open(QIODevice::ReadOnly); Q_UNUSED(open); Q_ASSERT(open); Q_ASSERT(file->size()); //Skip the offsets, we're already read them //Skip top-context data uint readValue; file->read((char*)&readValue, sizeof(uint)); file->seek(readValue + file->pos()); m_contexts.loadData(file); m_declarations.loadData(file); m_problems.loadData(file); #ifdef USE_MMAP m_mappedData = file->map(file->pos(), file->size() - file->pos()); if(m_mappedData) { m_mappedFile = file; m_mappedDataSize = file->size() - file->pos(); file->close(); //Close the file, so there is less open file descriptors(May be problematic) }else{ qCDebug(LANGUAGE) << "Failed to map" << file->fileName(); } #endif if(!m_mappedFile) { QByteArray data = file->readAll(); m_data.append({data, (uint)data.size()}); delete file; } m_dataLoaded = true; } TopDUContext* TopDUContextDynamicData::load(uint topContextIndex) { QFile file(pathForTopContext(topContextIndex)); if(file.open(QIODevice::ReadOnly)) { if(file.size() == 0) { qCWarning(LANGUAGE) << "Top-context file is empty" << file.fileName(); return nullptr; } QVector contextDataOffsets; QVector declarationDataOffsets; uint readValue; file.read((char*)&readValue, sizeof(uint)); //now readValue is filled with the top-context data size QByteArray topContextData = file.read(readValue); DUChainBaseData* topData = reinterpret_cast(topContextData.data()); TopDUContext* ret = dynamic_cast(DUChainItemSystem::self().create(topData)); if(!ret) { qCWarning(LANGUAGE) << "Cannot load a top-context from file" << file.fileName() << "- the required language-support for handling ID" << topData->classId << "is probably not loaded"; return nullptr; } TopDUContextDynamicData& target(*ret->m_dynamicData); target.m_data.clear(); target.m_dataLoaded = false; target.m_onDisk = true; ret->rebuildDynamicData(nullptr, topContextIndex); target.m_topContextData.append({topContextData, (uint)0}); return ret; }else{ return nullptr; } } bool TopDUContextDynamicData::isOnDisk() const { return m_onDisk; } void TopDUContextDynamicData::deleteOnDisk() { if(!isOnDisk()) return; qCDebug(LANGUAGE) << "deleting" << m_topContext->ownIndex() << m_topContext->url().str(); if(!m_dataLoaded) loadData(); m_contexts.deleteOnDisk(); m_declarations.deleteOnDisk(); m_problems.deleteOnDisk(); m_topContext->makeDynamic(); m_onDisk = false; bool successfullyRemoved = QFile::remove(filePath()); Q_UNUSED(successfullyRemoved); Q_ASSERT(successfullyRemoved); qCDebug(LANGUAGE) << "deletion ready"; } QString KDevelop::TopDUContextDynamicData::filePath() const { return pathForTopContext(m_topContext->ownIndex()); } bool TopDUContextDynamicData::hasChanged() const { return !m_onDisk || m_topContext->d_func()->m_dynamic || m_contexts.itemsHaveChanged() || m_declarations.itemsHaveChanged() || m_problems.itemsHaveChanged(); } void TopDUContextDynamicData::store() { // qCDebug(LANGUAGE) << "storing" << m_topContext->url().str() << m_topContext->ownIndex() << "import-count:" << m_topContext->importedParentContexts().size(); //Check if something has changed. If nothing has changed, don't store to disk. bool contentDataChanged = hasChanged(); if (!contentDataChanged) { return; } ///@todo Save the meta-data into a repository, and only the actual content data into a file. /// This will make saving+loading more efficient, and will reduce the disk-usage. /// Then we also won't need to load the data if only the meta-data changed. if(!m_dataLoaded) loadData(); ///If the data is mapped, and we re-write the file, we must make sure that the data is copied out of the map, ///even if only metadata is changed. ///@todo If we split up data and metadata, we don't need to do this if(m_mappedData) contentDataChanged = true; m_topContext->makeDynamic(); m_topContextData.clear(); Q_ASSERT(m_topContext->d_func()->m_ownIndex == m_topContext->ownIndex()); uint topContextDataSize = DUChainItemSystem::self().dynamicSize(*m_topContext->d_func()); m_topContextData.append({QByteArray(DUChainItemSystem::self().dynamicSize(*m_topContext->d_func()), topContextDataSize), 0u}); uint actualTopContextDataSize = 0; if (contentDataChanged) { //We don't need these structures any more, since we have loaded all the declarations/contexts, and m_data //will be reset which these structures pointed into //Load all lazy declarations/contexts const auto oldData = m_data; //Keep the old data alive until everything is stored into a new data structure m_data.clear(); uint newDataSize = 0; foreach(const ArrayWithPosition &array, oldData) newDataSize += array.position; newDataSize = std::max(newDataSize, 10000u); //We always put 1 byte to the front, so we don't have zero data-offsets, since those are used for "invalid". uint currentDataOffset = 1; m_data.append({QByteArray(newDataSize, 0), currentDataOffset}); m_itemRetrievalForbidden = true; m_contexts.storeData(currentDataOffset, oldData); m_declarations.storeData(currentDataOffset, oldData); m_problems.storeData(currentDataOffset, oldData); m_itemRetrievalForbidden = false; } saveDUChainItem(m_topContextData, *m_topContext, actualTopContextDataSize, false); Q_ASSERT(actualTopContextDataSize == topContextDataSize); Q_ASSERT(m_topContextData.size() == 1); Q_ASSERT(!m_topContext->d_func()->isDynamic()); unmap(); QDir().mkpath(basePath()); QFile file(filePath()); if(file.open(QIODevice::WriteOnly)) { file.resize(0); file.write((char*)&topContextDataSize, sizeof(uint)); foreach(const ArrayWithPosition& pos, m_topContextData) file.write(pos.array.constData(), pos.position); m_contexts.writeData(&file); m_declarations.writeData(&file); m_problems.writeData(&file); foreach(const ArrayWithPosition& pos, m_data) file.write(pos.array.constData(), pos.position); m_onDisk = true; if (file.size() == 0) { qCWarning(LANGUAGE) << "Saving zero size top ducontext data"; } file.close(); } else { qCWarning(LANGUAGE) << "Cannot open top-context for writing"; } // qCDebug(LANGUAGE) << "stored" << m_topContext->url().str() << m_topContext->ownIndex() << "import-count:" << m_topContext->importedParentContexts().size(); } TopDUContextDynamicData::ItemDataInfo TopDUContextDynamicData::writeDataInfo(const ItemDataInfo& info, const DUChainBaseData* data, uint& totalDataOffset) { ItemDataInfo ret(info); Q_ASSERT(info.dataOffset); const auto size = DUChainItemSystem::self().dynamicSize(*data); Q_ASSERT(size); if(m_data.back().array.size() - m_data.back().position < size) { //Create a new m_data item m_data.append({QByteArray(std::max(size, 10000u), 0), 0u}); } ret.dataOffset = totalDataOffset; uint pos = m_data.back().position; m_data.back().position += size; totalDataOffset += size; auto target = reinterpret_cast(m_data.back().array.data() + pos); #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif memcpy(target, data, size); +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic pop #endif verifyDataInfo(ret, m_data); return ret; } uint TopDUContextDynamicData::allocateDeclarationIndex(Declaration* decl, bool temporary) { return m_declarations.allocateItemIndex(decl, temporary); } uint TopDUContextDynamicData::allocateContextIndex(DUContext* context, bool temporary) { return m_contexts.allocateItemIndex(context, temporary); } uint TopDUContextDynamicData::allocateProblemIndex(const ProblemPointer& problem) { return m_problems.allocateItemIndex(problem, false); } bool TopDUContextDynamicData::isDeclarationForIndexLoaded(uint index) const { return m_declarations.isItemForIndexLoaded(index); } bool TopDUContextDynamicData::isContextForIndexLoaded(uint index) const { return m_contexts.isItemForIndexLoaded(index); } bool TopDUContextDynamicData::isTemporaryContextIndex(uint index) const { return !(index < (0x0fffffff/2)); } bool TopDUContextDynamicData::isTemporaryDeclarationIndex(uint index) const { return !(index < (0x0fffffff/2)); } DUContext* TopDUContextDynamicData::getContextForIndex(uint index) const { if(!m_dataLoaded) loadData(); if (index == 0) { return m_topContext; } return m_contexts.getItemForIndex(index); } Declaration* TopDUContextDynamicData::getDeclarationForIndex(uint index) const { if(!m_dataLoaded) loadData(); return m_declarations.getItemForIndex(index); } ProblemPointer TopDUContextDynamicData::getProblemForIndex(uint index) const { if(!m_dataLoaded) loadData(); return m_problems.getItemForIndex(index); } void TopDUContextDynamicData::clearDeclarationIndex(Declaration* decl) { m_declarations.clearItemIndex(decl, decl->m_indexInTopContext); } void TopDUContextDynamicData::clearContextIndex(DUContext* context) { m_contexts.clearItemIndex(context, context->m_dynamicData->m_indexInTopContext); } void TopDUContextDynamicData::clearProblems() { m_problems.clearItems(); } diff --git a/kdevplatform/serialization/itemrepository.h b/kdevplatform/serialization/itemrepository.h index a47476f77f..f8c2c58307 100644 --- a/kdevplatform/serialization/itemrepository.h +++ b/kdevplatform/serialization/itemrepository.h @@ -1,2259 +1,2261 @@ /* Copyright 2008 David Nolden This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef KDEVPLATFORM_ITEMREPOSITORY_H #define KDEVPLATFORM_ITEMREPOSITORY_H #include #include #include #include #include #include "referencecounting.h" #include "abstractitemrepository.h" #include "repositorymanager.h" #include "itemrepositoryregistry.h" //#define DEBUG_MONSTERBUCKETS // #define DEBUG_ITEMREPOSITORY_LOADING // #define ifDebugInfiniteRecursion(x) x #define ifDebugInfiniteRecursion(x) // #define ifDebugLostSpace(x) x #define ifDebugLostSpace(x) // #define DEBUG_INCORRECT_DELETE //Makes sure that all items stay reachable through the basic hash // #define DEBUG_ITEM_REACHABILITY ///@todo Dynamic bucket hash size #ifdef DEBUG_ITEM_REACHABILITY #define ENSURE_REACHABLE(bucket) Q_ASSERT(allItemsReachable(bucket)); #define IF_ENSURE_REACHABLE(x) x #else #define ENSURE_REACHABLE(bucket) #define IF_ENSURE_REACHABLE(x) #endif #define ITEMREPOSITORY_USE_MMAP_LOADING //Assertion macro that prevents warnings if debugging is disabled //Only use it to verify values, it should not call any functions, since else the function will even be called in release mode #ifdef QT_NO_DEBUG #define VERIFY(X) if(!(X)) {qWarning() << "Failed to verify expression" << #X;} #else #define VERIFY(X) Q_ASSERT(X) #endif ///When this is uncommented, a 64-bit test-value is written behind the area an item is allowed to write into before ///createItem(..) is called, and an assertion triggers when it was changed during createItem(), which means createItem wrote too long. ///The problem: This temporarily overwrites valid data in the following item, so it will cause serious problems if that data is accessed ///during the call to createItem(). // #define DEBUG_WRITING_EXTENTS class TestItemRepository; namespace KDevelop { /** * This file implements a generic bucket-based indexing repository, that can be used for example to index strings. * * All you need to do is define your item type that you want to store into the repository, and create a request item * similar to ExampleItemRequest that compares and fills the defined item type. * * For example the string repository uses "unsigned short" as item-type, uses that actual value to store the length of the string, * and uses the space behind to store the actual string content. * * @see AbstractItemRepository * @see ItemRepository * * @see ExampleItem * @see ExampleItemRequest * * @see typerepository.h * @see stringrepository.h * @see indexedstring.h */ enum { ItemRepositoryBucketSize = 1<<16, ItemRepositoryBucketLimit = 1<<16 }; /** * Buckets are the memory-units that are used to store the data in an ItemRepository. * * About monster buckets: Normally a bucket has a size of 64kb, but when an item is * allocated that is larger than that, a "monster bucket" is allocated, which spans the * space of multiple buckets. */ template class Bucket { public: enum { AdditionalSpacePerItem = 2 }; enum { ObjectMapSize = ((ItemRepositoryBucketSize / ItemRequest::AverageSize) * 3) / 2 + 1, MaxFreeItemsForHide = 0, //When less than this count of free items in one buckets is reached, the bucket is removed from the global list of buckets with free items MaxFreeSizeForHide = fixedItemSize ? fixedItemSize : 0, //Only when the largest free size is smaller then this, the bucket is taken from the free list MinFreeItemsForReuse = 10,//When this count of free items in one bucket is reached, consider re-assigning them to new requests MinFreeSizeForReuse = ItemRepositoryBucketSize/20 //When the largest free item is bigger then this, the bucket is automatically added to the free list }; enum { NextBucketHashSize = ObjectMapSize, //Affects the average count of bucket-chains that need to be walked in ItemRepository::index. Must be a multiple of ObjectMapSize DataSize = sizeof(char) + sizeof(unsigned int) * 3 + ItemRepositoryBucketSize + sizeof(short unsigned int) * (ObjectMapSize + NextBucketHashSize + 1) }; enum { CheckStart = 0xff00ff1, CheckEnd = 0xfafcfb }; Bucket() : m_monsterBucketExtent(0) , m_available(0) , m_data(nullptr) , m_mappedData(nullptr) , m_objectMap(nullptr) , m_largestFreeItem(0) , m_freeItemCount(0) , m_nextBucketHash(nullptr) , m_dirty(false) , m_changed(false) , m_lastUsed(0) { } ~Bucket() { if(m_data != m_mappedData) { delete[] m_data; delete[] m_nextBucketHash; delete[] m_objectMap; } } void initialize(int monsterBucketExtent) { if(!m_data) { m_monsterBucketExtent = monsterBucketExtent; m_available = ItemRepositoryBucketSize; m_data = new char[ItemRepositoryBucketSize + monsterBucketExtent * DataSize]; #ifndef QT_NO_DEBUG memset(m_data, 0, (ItemRepositoryBucketSize + monsterBucketExtent * DataSize) * sizeof(char)); #endif //The bigger we make the map, the lower the probability of a clash(and thus bad performance). However it increases memory usage. m_objectMap = new short unsigned int[ObjectMapSize]; memset(m_objectMap, 0, ObjectMapSize * sizeof(short unsigned int)); m_nextBucketHash = new short unsigned int[NextBucketHashSize]; memset(m_nextBucketHash, 0, NextBucketHashSize * sizeof(short unsigned int)); m_changed = true; m_dirty = false; m_lastUsed = 0; } } template void readValue(char*& from, T& to) { to = *reinterpret_cast(from); from += sizeof(T); } void initializeFromMap(char* current) { if(!m_data) { char* start = current; readValue(current, m_monsterBucketExtent); Q_ASSERT(current - start == 4); readValue(current, m_available); m_objectMap = reinterpret_cast(current); current += sizeof(short unsigned int) * ObjectMapSize; m_nextBucketHash = reinterpret_cast(current); current += sizeof(short unsigned int) * NextBucketHashSize; readValue(current, m_largestFreeItem); readValue(current, m_freeItemCount); readValue(current, m_dirty); m_data = current; m_mappedData = current; m_changed = false; m_lastUsed = 0; VERIFY(current - start == (DataSize - ItemRepositoryBucketSize)); } } void store(QFile* file, size_t offset) { if(!m_data) return; if(static_cast(file->size()) < offset + (1+m_monsterBucketExtent)*DataSize) file->resize(offset + (1+m_monsterBucketExtent)*DataSize); file->seek(offset); file->write((char*)&m_monsterBucketExtent, sizeof(unsigned int)); file->write((char*)&m_available, sizeof(unsigned int)); file->write((char*)m_objectMap, sizeof(short unsigned int) * ObjectMapSize); file->write((char*)m_nextBucketHash, sizeof(short unsigned int) * NextBucketHashSize); file->write((char*)&m_largestFreeItem, sizeof(short unsigned int)); file->write((char*)&m_freeItemCount, sizeof(unsigned int)); file->write((char*)&m_dirty, sizeof(bool)); file->write(m_data, ItemRepositoryBucketSize + m_monsterBucketExtent * DataSize); if(static_cast(file->pos()) != offset + (1+m_monsterBucketExtent)*DataSize) { KMessageBox::error(nullptr, i18n("Failed writing to %1, probably the disk is full", file->fileName())); abort(); } m_changed = false; #ifdef DEBUG_ITEMREPOSITORY_LOADING { file->flush(); file->seek(offset); uint available, freeItemCount, monsterBucketExtent; short unsigned int largestFree; bool dirty; short unsigned int* m = new short unsigned int[ObjectMapSize]; short unsigned int* h = new short unsigned int[NextBucketHashSize]; file->read((char*)&monsterBucketExtent, sizeof(unsigned int)); char* d = new char[ItemRepositoryBucketSize + monsterBucketExtent * DataSize]; file->read((char*)&available, sizeof(unsigned int)); file->read((char*)m, sizeof(short unsigned int) * ObjectMapSize); file->read((char*)h, sizeof(short unsigned int) * NextBucketHashSize); file->read((char*)&largestFree, sizeof(short unsigned int)); file->read((char*)&freeItemCount, sizeof(unsigned int)); file->read((char*)&dirty, sizeof(bool)); file->read(d, ItemRepositoryBucketSize); Q_ASSERT(monsterBucketExtent == m_monsterBucketExtent); Q_ASSERT(available == m_available); Q_ASSERT(memcmp(d, m_data, ItemRepositoryBucketSize + monsterBucketExtent * DataSize) == 0); Q_ASSERT(memcmp(m, m_objectMap, sizeof(short unsigned int) * ObjectMapSize) == 0); Q_ASSERT(memcmp(h, m_nextBucketHash, sizeof(short unsigned int) * NextBucketHashSize) == 0); Q_ASSERT(m_largestFreeItem == largestFree); Q_ASSERT(m_freeItemCount == freeItemCount); Q_ASSERT(m_dirty == dirty); Q_ASSERT(static_cast(file->pos()) == offset + DataSize + m_monsterBucketExtent * DataSize); delete[] d; delete[] m; delete[] h; } #endif } inline char* data() { return m_data; } inline uint dataSize() const { return ItemRepositoryBucketSize + m_monsterBucketExtent * DataSize; } //Tries to find the index this item has in this bucket, or returns zero if the item isn't there yet. unsigned short findIndex(const ItemRequest& request) const { m_lastUsed = 0; unsigned short localHash = request.hash() % ObjectMapSize; unsigned short index = m_objectMap[localHash]; unsigned short follower = 0; //Walk the chain of items with the same local hash while(index && (follower = followerIndex(index)) && !(request.equals(itemFromIndex(index)))) index = follower; if(index && request.equals(itemFromIndex(index))) { return index; //We have found the item } return 0; } //Tries to get the index within this bucket, or returns zero. Will put the item into the bucket if there is room. //Created indices will never begin with 0xffff____, so you can use that index-range for own purposes. unsigned short index(const ItemRequest& request, unsigned int itemSize) { m_lastUsed = 0; unsigned short localHash = request.hash() % ObjectMapSize; unsigned short index = m_objectMap[localHash]; unsigned short insertedAt = 0; unsigned short follower = 0; //Walk the chain of items with the same local hash while(index && (follower = followerIndex(index)) && !(request.equals(itemFromIndex(index)))) index = follower; if(index && request.equals(itemFromIndex(index))) return index; //We have found the item ifDebugLostSpace( Q_ASSERT(!lostSpace()); ) prepareChange(); unsigned int totalSize = itemSize + AdditionalSpacePerItem; if(m_monsterBucketExtent) { ///This is a monster-bucket. Other rules are applied here. Only one item can be allocated, and that must be bigger than the bucket data Q_ASSERT(totalSize > ItemRepositoryBucketSize); Q_ASSERT(m_available); m_available = 0; insertedAt = AdditionalSpacePerItem; setFollowerIndex(insertedAt, 0); Q_ASSERT(m_objectMap[localHash] == 0); m_objectMap[localHash] = insertedAt; if(markForReferenceCounting) enableDUChainReferenceCounting(m_data, dataSize()); request.createItem(reinterpret_cast(m_data + insertedAt)); if(markForReferenceCounting) disableDUChainReferenceCounting(m_data); return insertedAt; } //The second condition is needed, else we can get problems with zero-length items and an overflow in insertedAt to zero if(totalSize > m_available || (!itemSize && totalSize == m_available)) { //Try finding the smallest freed item that can hold the data unsigned short currentIndex = m_largestFreeItem; unsigned short previousIndex = 0; unsigned short freeChunkSize = 0; ///@todo Achieve this without full iteration while(currentIndex && freeSize(currentIndex) > itemSize) { unsigned short follower = followerIndex(currentIndex); if(follower && freeSize(follower) >= itemSize) { //The item also fits into the smaller follower, so use that one previousIndex = currentIndex; currentIndex = follower; }else{ //The item fits into currentIndex, but not into the follower. So use currentIndex freeChunkSize = freeSize(currentIndex) - itemSize; //We need 2 bytes to store the free size if(freeChunkSize != 0 && freeChunkSize < AdditionalSpacePerItem+2) { //we can not manage the resulting free chunk as a separate item, so we cannot use this position. //Just pick the biggest free item, because there we can be sure that //either we can manage the split, or we cannot do anything at all in this bucket. freeChunkSize = freeSize(m_largestFreeItem) - itemSize; if(freeChunkSize == 0 || freeChunkSize >= AdditionalSpacePerItem+2) { previousIndex = 0; currentIndex = m_largestFreeItem; }else{ currentIndex = 0; } } break; } } if(!currentIndex || freeSize(currentIndex) < (totalSize-AdditionalSpacePerItem)) return 0; if(previousIndex) setFollowerIndex(previousIndex, followerIndex(currentIndex)); else m_largestFreeItem = followerIndex(currentIndex); --m_freeItemCount; //Took one free item out of the chain ifDebugLostSpace( Q_ASSERT((uint)lostSpace() == (uint)(freeSize(currentIndex) + AdditionalSpacePerItem)); ) if(freeChunkSize) { Q_ASSERT(freeChunkSize >= AdditionalSpacePerItem+2); unsigned short freeItemSize = freeChunkSize - AdditionalSpacePerItem; unsigned short freeItemPosition; //Insert the resulting free chunk into the list of free items, so we don't lose it if(isBehindFreeSpace(currentIndex)) { //Create the free item at the beginning of currentIndex, so it can be merged with the free space in front freeItemPosition = currentIndex; currentIndex += freeItemSize + AdditionalSpacePerItem; }else{ //Create the free item behind currentIndex freeItemPosition = currentIndex + itemSize + AdditionalSpacePerItem; } setFreeSize(freeItemPosition, freeItemSize); insertFreeItem(freeItemPosition); } insertedAt = currentIndex; Q_ASSERT((bool)m_freeItemCount == (bool)m_largestFreeItem); }else{ //We have to insert the item insertedAt = ItemRepositoryBucketSize - m_available; insertedAt += AdditionalSpacePerItem; //Room for the prepended follower-index m_available -= totalSize; } ifDebugLostSpace( Q_ASSERT(lostSpace() == totalSize); ) Q_ASSERT(!index || !followerIndex(index)); Q_ASSERT(!m_objectMap[localHash] || index); if(index) setFollowerIndex(index, insertedAt); setFollowerIndex(insertedAt, 0); if(m_objectMap[localHash] == 0) m_objectMap[localHash] = insertedAt; #ifdef DEBUG_CREATEITEM_EXTENTS char* borderBehind = m_data + insertedAt + (totalSize-AdditionalSpacePerItem); quint64 oldValueBehind = 0; if(m_available >= 8) { oldValueBehind = *(quint64*)borderBehind; *((quint64*)borderBehind) = 0xfafafafafafafafaLLU; } #endif //Last thing we do, because createItem may recursively do even more transformation of the repository if(markForReferenceCounting) enableDUChainReferenceCounting(m_data, dataSize()); request.createItem(reinterpret_cast(m_data + insertedAt)); if(markForReferenceCounting) disableDUChainReferenceCounting(m_data); #ifdef DEBUG_CREATEITEM_EXTENTS if(m_available >= 8) { //If this assertion triggers, then the item writes a bigger range than it advertised in Q_ASSERT(*((quint64*)borderBehind) == 0xfafafafafafafafaLLU); *((quint64*)borderBehind) = oldValueBehind; } #endif Q_ASSERT(itemFromIndex(insertedAt)->hash() == request.hash()); Q_ASSERT(itemFromIndex(insertedAt)->itemSize() == itemSize); ifDebugLostSpace( if(lostSpace()) qDebug() << "lost space:" << lostSpace(); Q_ASSERT(!lostSpace()); ) return insertedAt; } /// @param modulo Returns whether this bucket contains an item with (hash % modulo) == (item.hash % modulo) /// The default-parameter is the size of the next-bucket hash that is used by setNextBucketForHash and nextBucketForHash /// @note modulo MUST be a multiple of ObjectMapSize, because (b-a) | (x * h1) => (b-a) | h2, where a|b means a is a multiple of b. /// This this allows efficiently computing the clashes using the local object map hash. bool hasClashingItem(uint hash, uint modulo) { Q_ASSERT(modulo % ObjectMapSize == 0); m_lastUsed = 0; uint hashMod = hash % modulo; unsigned short localHash = hash % ObjectMapSize; unsigned short currentIndex = m_objectMap[localHash]; if(currentIndex == 0) return false; while(currentIndex) { uint currentHash = itemFromIndex(currentIndex)->hash(); Q_ASSERT(currentHash % ObjectMapSize == localHash); if(currentHash % modulo == hashMod) return true; //Clash currentIndex = followerIndex(currentIndex); } return false; } void countFollowerIndexLengths(uint& usedSlots, uint& lengths, uint& slotCount, uint& longestInBucketFollowerChain) { for(uint a = 0; a < ObjectMapSize; ++a) { unsigned short currentIndex = m_objectMap[a]; ++slotCount; uint length = 0; if(currentIndex) { ++usedSlots; while(currentIndex) { ++length; ++lengths; currentIndex = followerIndex(currentIndex); } if(length > longestInBucketFollowerChain) { // qDebug() << "follower-chain at" << a << ":" << length; longestInBucketFollowerChain = length; } } } } //Returns whether the given item is reachabe within this bucket, through its hash. bool itemReachable(const Item* item, uint hash) const { unsigned short localHash = hash % ObjectMapSize; unsigned short currentIndex = m_objectMap[localHash]; while(currentIndex) { if(itemFromIndex(currentIndex) == item) return true; currentIndex = followerIndex(currentIndex); } return false; } template void deleteItem(unsigned short index, unsigned int hash, Repository& repository) { ifDebugLostSpace( Q_ASSERT(!lostSpace()); ) m_lastUsed = 0; prepareChange(); unsigned int size = itemFromIndex(index)->itemSize(); //Step 1: Remove the item from the data-structures that allow finding it: m_objectMap unsigned short localHash = hash % ObjectMapSize; unsigned short currentIndex = m_objectMap[localHash]; unsigned short previousIndex = 0; //Fix the follower-link by setting the follower of the previous item to the next one, or updating m_objectMap while(currentIndex != index) { previousIndex = currentIndex; currentIndex = followerIndex(currentIndex); //If this assertion triggers, the deleted item was not registered under the given hash Q_ASSERT(currentIndex); } Q_ASSERT(currentIndex == index); if(!previousIndex) //The item was directly in the object map m_objectMap[localHash] = followerIndex(index); else setFollowerIndex(previousIndex, followerIndex(index)); Item* item = const_cast(itemFromIndex(index)); if(markForReferenceCounting) enableDUChainReferenceCounting(m_data, dataSize()); ItemRequest::destroy(item, repository); if(markForReferenceCounting) disableDUChainReferenceCounting(m_data); #ifndef QT_NO_DEBUG #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif memset(item, 0, size); //For debugging, so we notice the data is wrong +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic pop #endif #endif if(m_monsterBucketExtent) { ///This is a monster-bucket. Make it completely empty again. Q_ASSERT(!m_available); m_available = ItemRepositoryBucketSize; //Items are always inserted into monster-buckets at a fixed position Q_ASSERT(currentIndex == AdditionalSpacePerItem); Q_ASSERT(m_objectMap[localHash] == 0); }else{ ///Put the space into the free-set setFreeSize(index, size); //Try merging the created free item to other free items around, else add it into the free list insertFreeItem(index); if(m_freeItemCount == 1 && freeSize(m_largestFreeItem) + m_available == ItemRepositoryBucketSize) { //Everything has been deleted, there is only free space left. Make the bucket empty again, //so it can later also be used as a monster-bucket. m_available = ItemRepositoryBucketSize; m_freeItemCount = 0; m_largestFreeItem = 0; } } Q_ASSERT((bool)m_freeItemCount == (bool)m_largestFreeItem); ifDebugLostSpace( Q_ASSERT(!lostSpace()); ) #ifdef DEBUG_INCORRECT_DELETE //Make sure the item cannot be found any more { unsigned short localHash = hash % ObjectMapSize; unsigned short currentIndex = m_objectMap[localHash]; while(currentIndex && currentIndex != index) { previousIndex = currentIndex; currentIndex = followerIndex(currentIndex); } Q_ASSERT(!currentIndex); //The item must not be found } #endif // Q_ASSERT(canAllocateItem(size)); } ///@warning The returned item may be in write-protected memory, so never try doing a const_cast and changing some data /// If you need to change something, use dynamicItemFromIndex ///@warning When using multi-threading, mutex() must be locked as long as you use the returned data inline const Item* itemFromIndex(unsigned short index) const { m_lastUsed = 0; return reinterpret_cast(m_data+index); } bool isEmpty() const { return m_available == ItemRepositoryBucketSize; } ///Returns true if this bucket has no nextBucketForHash links bool noNextBuckets() const { for(int a = 0; a < NextBucketHashSize; ++a) if(m_nextBucketHash[a]) return false; return true; } uint available() const { return m_available; } uint usedMemory() const { return ItemRepositoryBucketSize - m_available; } template bool visitAllItems(Visitor& visitor) const { m_lastUsed = 0; for(uint a = 0; a < ObjectMapSize; ++a) { uint currentIndex = m_objectMap[a]; while(currentIndex) { //Get the follower early, so there is no problems when the current //index is removed if(!visitor(reinterpret_cast(m_data+currentIndex))) return false; currentIndex = followerIndex(currentIndex); } } return true; } ///Returns whether something was changed template int finalCleanup(Repository& repository) { int changed = 0; while(m_dirty) { m_dirty = false; for(uint a = 0; a < ObjectMapSize; ++a) { uint currentIndex = m_objectMap[a]; while(currentIndex) { //Get the follower early, so there is no problems when the current //index is removed const Item* item = reinterpret_cast(m_data+currentIndex); if(!ItemRequest::persistent(item)) { changed += item->itemSize(); deleteItem(currentIndex, item->hash(), repository); m_dirty = true; //Set to dirty so we re-iterate break; } currentIndex = followerIndex(currentIndex); } } } return changed; } unsigned short nextBucketForHash(uint hash) const { m_lastUsed = 0; return m_nextBucketHash[hash % NextBucketHashSize]; } void setNextBucketForHash(unsigned int hash, unsigned short bucket) { m_lastUsed = 0; prepareChange(); m_nextBucketHash[hash % NextBucketHashSize] = bucket; } uint freeItemCount() const { return m_freeItemCount; } short unsigned int totalFreeItemsSize() const { short unsigned int ret = 0; short unsigned int currentIndex = m_largestFreeItem; while(currentIndex) { ret += freeSize(currentIndex); currentIndex = followerIndex(currentIndex); } return ret; } //Size of the largest item that could be inserted into this bucket short unsigned int largestFreeSize() const { short unsigned int ret = 0; if(m_largestFreeItem) ret = freeSize(m_largestFreeItem); if(m_available > (uint)(AdditionalSpacePerItem + (uint)ret)) { ret = m_available - AdditionalSpacePerItem; Q_ASSERT(ret == (m_available - AdditionalSpacePerItem)); } return ret; } bool canAllocateItem(unsigned int size) const { short unsigned int currentIndex = m_largestFreeItem; while(currentIndex) { short unsigned int currentFree = freeSize(currentIndex); if(currentFree < size) return false; //Either we need an exact match, or 2 additional bytes to manage the resulting gap if(size == currentFree || currentFree - size >= AdditionalSpacePerItem + 2) return true; currentIndex = followerIndex(currentIndex); } return false; } void tick() const { ++m_lastUsed; } //How many ticks ago the item was last used int lastUsed() const { return m_lastUsed; } //Whether this bucket was changed since it was last stored bool changed() const { return m_changed; } void prepareChange() { m_changed = true; m_dirty = true; makeDataPrivate(); } bool dirty() const { return m_dirty; } ///Returns the count of following buckets that were merged onto this buckets data array int monsterBucketExtent() const { return m_monsterBucketExtent; } //Counts together the space that is neither accessible through m_objectMap nor through the free items uint lostSpace() { if(m_monsterBucketExtent) return 0; uint need = ItemRepositoryBucketSize - m_available; uint found = 0; for(uint a = 0; a < ObjectMapSize; ++a) { uint currentIndex = m_objectMap[a]; while(currentIndex) { found += reinterpret_cast(m_data+currentIndex)->itemSize() + AdditionalSpacePerItem; currentIndex = followerIndex(currentIndex); } } uint currentIndex = m_largestFreeItem; while(currentIndex) { found += freeSize(currentIndex) + AdditionalSpacePerItem; currentIndex = followerIndex(currentIndex); } return need-found; } private: void makeDataPrivate() { if(m_mappedData == m_data) { short unsigned int* oldObjectMap = m_objectMap; short unsigned int* oldNextBucketHash = m_nextBucketHash; m_data = new char[ItemRepositoryBucketSize + m_monsterBucketExtent * DataSize]; m_objectMap = new short unsigned int[ObjectMapSize]; m_nextBucketHash = new short unsigned int[NextBucketHashSize]; memcpy(m_data, m_mappedData, ItemRepositoryBucketSize + m_monsterBucketExtent * DataSize); memcpy(m_objectMap, oldObjectMap, ObjectMapSize * sizeof(short unsigned int)); memcpy(m_nextBucketHash, oldNextBucketHash, NextBucketHashSize * sizeof(short unsigned int)); } } ///Merges the given index item, which must have a freeSize() set, to surrounding free items, and inserts the result. ///The given index itself should not be in the free items chain yet. ///Returns whether the item was inserted somewhere. void insertFreeItem(unsigned short index) { //If the item-size is fixed, we don't need to do any management. Just keep a list of free items. Items of other size will never be requested. if(!fixedItemSize) { unsigned short currentIndex = m_largestFreeItem; unsigned short previousIndex = 0; while(currentIndex) { Q_ASSERT(currentIndex != index); #ifndef QT_NO_DEBUG unsigned short currentFreeSize = freeSize(currentIndex); #endif ///@todo Achieve this without iterating through all items in the bucket(This is very slow) //Merge behind index if(currentIndex == index + freeSize(index) + AdditionalSpacePerItem) { //Remove currentIndex from the free chain, since it's merged backwards into index if(previousIndex && followerIndex(currentIndex)) Q_ASSERT(freeSize(previousIndex) >= freeSize(followerIndex(currentIndex))); if(previousIndex) setFollowerIndex(previousIndex, followerIndex(currentIndex)); else m_largestFreeItem = followerIndex(currentIndex); --m_freeItemCount; //One was removed //currentIndex is directly behind index, touching its space. Merge them. setFreeSize(index, freeSize(index) + AdditionalSpacePerItem + freeSize(currentIndex)); //Recurse to do even more merging insertFreeItem(index); return; } //Merge before index if(index == currentIndex + freeSize(currentIndex) + AdditionalSpacePerItem) { if(previousIndex && followerIndex(currentIndex)) Q_ASSERT(freeSize(previousIndex) >= freeSize(followerIndex(currentIndex))); //Remove currentIndex from the free chain, since insertFreeItem wants //it not to be in the chain, and it will be inserted in another place if(previousIndex) setFollowerIndex(previousIndex, followerIndex(currentIndex)); else m_largestFreeItem = followerIndex(currentIndex); --m_freeItemCount; //One was removed //index is directly behind currentIndex, touching its space. Merge them. setFreeSize(currentIndex, freeSize(currentIndex) + AdditionalSpacePerItem + freeSize(index)); //Recurse to do even more merging insertFreeItem(currentIndex); return; } previousIndex = currentIndex; currentIndex = followerIndex(currentIndex); #ifndef QT_NO_DEBUG if(currentIndex) Q_ASSERT(freeSize(currentIndex) <= currentFreeSize); #endif } } insertToFreeChain(index); } ///Only inserts the item in the correct position into the free chain. index must not be in the chain yet. void insertToFreeChain(unsigned short index) { if(!fixedItemSize) { ///@todo Use some kind of tree to find the correct position in the chain(This is very slow) //Insert the free item into the chain opened by m_largestFreeItem unsigned short currentIndex = m_largestFreeItem; unsigned short previousIndex = 0; unsigned short size = freeSize(index); while(currentIndex && freeSize(currentIndex) > size) { Q_ASSERT(currentIndex != index); //must not be in the chain yet previousIndex = currentIndex; currentIndex = followerIndex(currentIndex); } if(currentIndex) Q_ASSERT(freeSize(currentIndex) <= size); setFollowerIndex(index, currentIndex); if(previousIndex) { Q_ASSERT(freeSize(previousIndex) >= size); setFollowerIndex(previousIndex, index); } else //This item is larger than all already registered free items, or there are none. m_largestFreeItem = index; }else{ Q_ASSERT(freeSize(index) == fixedItemSize); //When all items have the same size, just prepent to the front. setFollowerIndex(index, m_largestFreeItem); m_largestFreeItem = index; } ++m_freeItemCount; } /// Returns true if the given index is right behind free space, and thus can be merged to the free space. bool isBehindFreeSpace(unsigned short index) const { // TODO: Without iteration! unsigned short currentIndex = m_largestFreeItem; while(currentIndex) { if(index == currentIndex + freeSize(currentIndex) + AdditionalSpacePerItem) return true; currentIndex = followerIndex(currentIndex); } return false; } /// @param index the index of an item @return The index of the next item in the chain of items with a same local hash, or zero inline unsigned short followerIndex(unsigned short index) const { Q_ASSERT(index >= 2); return *reinterpret_cast(m_data+(index-2)); } void setFollowerIndex(unsigned short index, unsigned short follower) { Q_ASSERT(index >= 2); *reinterpret_cast(m_data+(index-2)) = follower; } // Only returns the current value if the item is actually free inline unsigned short freeSize(unsigned short index) const { return *reinterpret_cast(m_data+index); } //Convenience function to set the free-size, only for freed items void setFreeSize(unsigned short index, unsigned short size) { *reinterpret_cast(m_data+index) = size; } int m_monsterBucketExtent; //If this is a monster-bucket, this contains the count of follower-buckets that belong to this one unsigned int m_available; char* m_data; //Structure of the data: (2 byte), (item.size() byte) char* m_mappedData; //Read-only memory-mapped data. If this equals m_data, m_data must not be written short unsigned int* m_objectMap; //Points to the first object in m_data with (hash % ObjectMapSize) == index. Points to the item itself, so subtract 1 to get the pointer to the next item with same local hash. short unsigned int m_largestFreeItem; //Points to the largest item that is currently marked as free, or zero. That one points to the next largest one through followerIndex unsigned int m_freeItemCount; unsigned short* m_nextBucketHash; bool m_dirty; //Whether the data was changed since the last finalCleanup bool m_changed; //Whether this bucket was changed since it was last stored to disk mutable int m_lastUsed; //How many ticks ago this bucket was last accessed }; template struct Locker { //This is a dummy that does nothing template explicit Locker(const T& /*t*/) { } }; template<> struct Locker { explicit Locker(QMutex* mutex) : m_mutex(mutex) { m_mutex->lock(); } ~Locker() { m_mutex->unlock(); } QMutex* m_mutex; }; ///This object needs to be kept alive as long as you change the contents of an item ///stored in the repository. It is needed to correctly track the reference counting ///within disk-storage. ///@warning You can not freely copy this around, when you create a copy, the copy source /// becomes invalid template class DynamicItem { public: DynamicItem(Item* i, void* start, uint size) : m_item(i), m_start(start) { if(markForReferenceCounting) enableDUChainReferenceCounting(m_start, size); // qDebug() << "enabling" << i << "to" << (void*)(((char*)i)+size); } ~DynamicItem() { if(m_start) { // qDebug() << "destructor-disabling" << m_item; if(markForReferenceCounting) disableDUChainReferenceCounting(m_start); } } DynamicItem(const DynamicItem& rhs) : m_item(rhs.m_item), m_start(rhs.m_start) { // qDebug() << "stealing" << m_item; Q_ASSERT(rhs.m_start); rhs.m_start = nullptr; } Item* operator->() { return m_item; } Item* m_item; private: mutable void* m_start; DynamicItem& operator=(const DynamicItem&); }; ///@tparam Item See ExampleItem ///@tparam ItemRequest See ExampleReqestItem ///@tparam fixedItemSize When this is true, all inserted items must have the same size. /// This greatly simplifies and speeds up the task of managing free items within the buckets. ///@tparam markForReferenceCounting Whether the data within the repository should be marked for reference-counting. /// This costs a bit of performance, but must be enabled if there may be data in the repository /// that does on-disk reference counting, like IndexedString, IndexedIdentifier, etc. ///@tparam threadSafe Whether class access should be thread-safe. Disabling this is dangerous when you do multi-threading. /// You have to make sure that mutex() is locked whenever the repository is accessed. template class ItemRepository : public AbstractItemRepository { typedef Locker ThisLocker; typedef Bucket MyBucket; enum { //Must be a multiple of Bucket::ObjectMapSize, so Bucket::hasClashingItem can be computed //Must also be a multiple of Bucket::NextBucketHashSize, for the same reason.(Currently those are same) bucketHashSize = (targetBucketHashSize / MyBucket::ObjectMapSize) * MyBucket::ObjectMapSize }; enum { BucketStartOffset = sizeof(uint) * 7 + sizeof(short unsigned int) * bucketHashSize //Position in the data where the bucket array starts }; public: ///@param registry May be zero, then the repository will not be registered at all. Else, the repository will register itself to that registry. /// If this is zero, you have to care about storing the data using store() and/or close() by yourself. It does not happen automatically. /// For the global standard registry, the storing/loading is triggered from within duchain, so you don't need to care about it. explicit ItemRepository(const QString& repositoryName, ItemRepositoryRegistry* registry = &globalItemRepositoryRegistry(), uint repositoryVersion = 1, AbstractRepositoryManager* manager = nullptr) : m_ownMutex(QMutex::Recursive) , m_mutex(&m_ownMutex) , m_repositoryName(repositoryName) , m_registry(registry) , m_file(nullptr) , m_dynamicFile(nullptr) , m_repositoryVersion(repositoryVersion) , m_manager(manager) { m_unloadingEnabled = true; m_metaDataChanged = true; m_buckets.resize(10); m_buckets.fill(nullptr); memset(m_firstBucketForHash, 0, bucketHashSize * sizeof(short unsigned int)); m_statBucketHashClashes = m_statItemCount = 0; m_currentBucket = 1; //Skip the first bucket, we won't use it so we have the zero indices for special purposes if(m_registry) m_registry->registerRepository(this, m_manager); } ~ItemRepository() { if(m_registry) m_registry->unRegisterRepository(this); close(); } ///Unloading of buckets is enabled by default. Use this to disable it. When unloading is enabled, the data ///gotten from must only itemFromIndex must not be used for a long time. void setUnloadingEnabled(bool enabled) { m_unloadingEnabled = enabled; } ///Returns the index for the given item. If the item is not in the repository yet, it is inserted. ///The index can never be zero. Zero is reserved for your own usage as invalid ///@param request Item to retrieve the index from unsigned int index(const ItemRequest& request) { ThisLocker lock(m_mutex); const uint hash = request.hash(); const uint size = request.itemSize(); // Bucket indexes tracked while walking the bucket chain for this request hash unsigned short bucketInChainWithSpace = 0; unsigned short lastBucketWalked = 0; const ushort foundIndexInBucket = walkBucketChain(hash, [&](ushort bucketIdx, const MyBucket* bucketPtr) { lastBucketWalked = bucketIdx; const ushort found = bucketPtr->findIndex(request); if (!found && !bucketInChainWithSpace && bucketPtr->canAllocateItem(size)) { bucketInChainWithSpace = bucketIdx; } return found; }); if (foundIndexInBucket) { // 'request' is already present, return the existing index return createIndex(lastBucketWalked, foundIndexInBucket); } /* * Disclaimer: Writer of comment != writer of code, believe with caution * * Requested item does not yet exist. Need to find a place to put it... * * First choice is to place it in an existing bucket in the chain for the request hash * Second choice is to find an existing bucket anywhere with enough space * Otherwise use m_currentBucket (the latest unused bucket) * * If the chosen bucket fails to allocate the item, merge buckets to create a monster (dragon?) * * Finally, if the first option failed or the selected bucket failed to allocate, add the * bucket which the item ended up in to the chain of buckets for the request's hash */ m_metaDataChanged = true; const bool pickedBucketInChain = bucketInChainWithSpace; int useBucket = bucketInChainWithSpace; int reOrderFreeSpaceBucketIndex = -1; if (!pickedBucketInChain) { //Try finding an existing bucket with deleted space to store the data into for(int a = 0; a < m_freeSpaceBuckets.size(); ++a) { MyBucket* bucketPtr = bucketForIndex(m_freeSpaceBuckets[a]); Q_ASSERT(bucketPtr->largestFreeSize()); if(bucketPtr->canAllocateItem(size)) { //The item fits into the bucket. useBucket = m_freeSpaceBuckets[a]; reOrderFreeSpaceBucketIndex = a; break; } } if (!useBucket) { useBucket = m_currentBucket; } } else { reOrderFreeSpaceBucketIndex = m_freeSpaceBuckets.indexOf(useBucket); } //The item isn't in the repository yet, find a new bucket for it while(1) { if(useBucket >= m_buckets.size()) { if(m_buckets.size() >= 0xfffe) { //We have reserved the last bucket index 0xffff for special purposes //the repository has overflown. qWarning() << "Found no room for an item in" << m_repositoryName << "size of the item:" << request.itemSize(); return 0; }else{ //Allocate new buckets m_buckets.resize(m_buckets.size() + 10); } } MyBucket* bucketPtr = m_buckets.at(useBucket); if(!bucketPtr) { initializeBucket(useBucket); bucketPtr = m_buckets.at(useBucket); } ENSURE_REACHABLE(useBucket); Q_ASSERT_X(!bucketPtr->findIndex(request), Q_FUNC_INFO, "found item in unexpected bucket, ensure your ItemRequest::equals method is correct. Note: For custom AbstractType's e.g. ensure you have a proper equals() override"); unsigned short indexInBucket = bucketPtr->index(request, size); //If we could not allocate the item in an empty bucket, then we need to create a monster-bucket that //can hold the data. if(bucketPtr->isEmpty() && !indexInBucket) { ///@todo Move this compound statement into an own function uint totalSize = size + MyBucket::AdditionalSpacePerItem; Q_ASSERT((totalSize > ItemRepositoryBucketSize)); useBucket = 0; //The item did not fit in, we need a monster-bucket(Merge consecutive buckets) ///Step one: Search whether we can merge multiple empty buckets in the free-list into one monster-bucket int rangeStart = -1; int rangeEnd = -1; for(int a = 0; a < m_freeSpaceBuckets.size(); ++a) { MyBucket* bucketPtr = bucketForIndex(m_freeSpaceBuckets[a]); if(bucketPtr->isEmpty()) { //This bucket is a candidate for monster-bucket merging int index = (int)m_freeSpaceBuckets[a]; if(rangeEnd != index) { rangeStart = index; rangeEnd = index+1; }else{ ++rangeEnd; } if(rangeStart != rangeEnd) { uint extent = rangeEnd - rangeStart - 1; uint totalAvailableSpace = bucketForIndex(rangeStart)->available() + MyBucket::DataSize * (rangeEnd - rangeStart - 1); if(totalAvailableSpace > totalSize) { Q_ASSERT(extent); ///We can merge these buckets into one monster-bucket that can hold the data Q_ASSERT((uint)m_freeSpaceBuckets[a-extent] == (uint)rangeStart); m_freeSpaceBuckets.remove(a-extent, extent+1); useBucket = rangeStart; convertMonsterBucket(rangeStart, extent); break; } } } } if(!useBucket) { //Create a new monster-bucket at the end of the data int needMonsterExtent = (totalSize - ItemRepositoryBucketSize) / MyBucket::DataSize + 1; Q_ASSERT(needMonsterExtent); if(m_currentBucket + needMonsterExtent + 1 > m_buckets.size()) { m_buckets.resize(m_buckets.size() + 10 + needMonsterExtent + 1); } useBucket = m_currentBucket; convertMonsterBucket(useBucket, needMonsterExtent); m_currentBucket += 1 + needMonsterExtent; Q_ASSERT(m_currentBucket < ItemRepositoryBucketLimit); Q_ASSERT(m_buckets[m_currentBucket - 1 - needMonsterExtent] && m_buckets[m_currentBucket - 1 - needMonsterExtent]->monsterBucketExtent() == needMonsterExtent); } Q_ASSERT(useBucket); bucketPtr = bucketForIndex(useBucket); indexInBucket = bucketPtr->index(request, size); Q_ASSERT(indexInBucket); } if(indexInBucket) { ++m_statItemCount; const int previousBucketNumber = lastBucketWalked; unsigned short* const bucketHashPosition = m_firstBucketForHash + (hash % bucketHashSize); if(!(*bucketHashPosition)) { Q_ASSERT(!previousBucketNumber); (*bucketHashPosition) = useBucket; ENSURE_REACHABLE(useBucket); } else if(!pickedBucketInChain && previousBucketNumber && previousBucketNumber != useBucket) { //Should happen rarely ++m_statBucketHashClashes; ///Debug: Detect infinite recursion ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, previousBucketNumber));) //Find the position where the paths of useBucket and *bucketHashPosition intersect, and insert useBucket //there. That way, we don't create loops. QPair intersect = hashChainIntersection(*bucketHashPosition, useBucket, hash); Q_ASSERT(m_buckets[previousBucketNumber]->nextBucketForHash(hash) == 0); if(!intersect.second) { ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket));) ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(useBucket, hash, previousBucketNumber));) Q_ASSERT(m_buckets[previousBucketNumber]->nextBucketForHash(hash) == 0); m_buckets[previousBucketNumber]->setNextBucketForHash(hash, useBucket); ENSURE_REACHABLE(useBucket); ENSURE_REACHABLE(previousBucketNumber); ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, useBucket));) } else if(intersect.first) { ifDebugInfiniteRecursion(Q_ASSERT(bucketForIndex(intersect.first)->nextBucketForHash(hash) == intersect.second);) ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket));) ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.second));) ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.first));) ifDebugInfiniteRecursion(Q_ASSERT(bucketForIndex(intersect.first)->nextBucketForHash(hash) == intersect.second);) ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(useBucket, hash, intersect.second));) ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(useBucket, hash, intersect.first));) bucketForIndex(intersect.first)->setNextBucketForHash(hash, useBucket); ENSURE_REACHABLE(useBucket); ENSURE_REACHABLE(intersect.second); ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, useBucket));) ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.second));) } else { //State: intersect.first == 0 && intersect.second != 0. This means that whole compleet //chain opened by *bucketHashPosition with the hash-value is also following useBucket, //so useBucket can just be inserted at the top ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket));) ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(useBucket, hash, *bucketHashPosition));) unsigned short oldStart = *bucketHashPosition; *bucketHashPosition = useBucket; ENSURE_REACHABLE(useBucket); ENSURE_REACHABLE(oldStart); Q_UNUSED(oldStart); } } if(reOrderFreeSpaceBucketIndex != -1) updateFreeSpaceOrder(reOrderFreeSpaceBucketIndex); return createIndex(useBucket, indexInBucket); }else{ //This should never happen when we picked a bucket for re-use Q_ASSERT(!pickedBucketInChain); Q_ASSERT(reOrderFreeSpaceBucketIndex == -1); Q_ASSERT(useBucket == m_currentBucket); if(!bucketForIndex(useBucket)->isEmpty()) putIntoFreeList(useBucket, bucketPtr); ++m_currentBucket; Q_ASSERT(m_currentBucket < ItemRepositoryBucketLimit); useBucket = m_currentBucket; } } //We haven't found a bucket that already contains the item, so append it to the current bucket qWarning() << "Found no bucket for an item in" << m_repositoryName; return 0; } ///Returns zero if the item is not in the repository yet unsigned int findIndex(const ItemRequest& request) { ThisLocker lock(m_mutex); return walkBucketChain(request.hash(), [this, &request](ushort bucketIdx, const MyBucket* bucketPtr) { const ushort indexInBucket = bucketPtr->findIndex(request); return indexInBucket ? createIndex(bucketIdx, indexInBucket) : 0u; }); } ///Deletes the item from the repository. void deleteItem(unsigned int index) { verifyIndex(index); ThisLocker lock(m_mutex); m_metaDataChanged = true; const uint hash = itemFromIndex(index)->hash(); const ushort bucket = (index >> 16); //Apart from removing the item itself, we may have to recreate the nextBucketForHash link, so we need the previous bucket MyBucket* previousBucketPtr = nullptr; MyBucket* const bucketPtr = walkBucketChain(hash, [bucket, &previousBucketPtr](ushort bucketIdx, MyBucket* bucketPtr) -> MyBucket* { if (bucket != bucketIdx) { previousBucketPtr = bucketPtr; return nullptr; } return bucketPtr; // found bucket, stop looking }); //Make sure the index was reachable through the hash chain Q_ASSERT(bucketPtr); --m_statItemCount; bucketPtr->deleteItem(index, hash, *this); /** * Now check whether the link root/previousBucketNumber -> bucket is still needed. */ if (!previousBucketPtr) { // This bucket is linked in the m_firstBucketForHash array, find the next clashing bucket in the chain // There may be items in the chain that clash only with MyBucket::NextBucketHashSize, skipped here m_firstBucketForHash[hash % bucketHashSize] = walkBucketChain(hash, [hash](ushort bucketIdx, MyBucket *bucketPtr){ if (bucketPtr->hasClashingItem(hash, bucketHashSize)) { return bucketIdx; } return static_cast(0); }); } else if(!bucketPtr->hasClashingItem(hash, MyBucket::NextBucketHashSize)) { // TODO: Skip clashing items reachable from m_firstBucketForHash // (see note in usePermissiveModuloWhenRemovingClashLinks() test) ENSURE_REACHABLE(bucket); previousBucketPtr->setNextBucketForHash(hash, bucketPtr->nextBucketForHash(hash)); Q_ASSERT(m_buckets[bucketPtr->nextBucketForHash(hash)] != previousBucketPtr); } ENSURE_REACHABLE(bucket); if(bucketPtr->monsterBucketExtent()) { //Convert the monster-bucket back to multiple normal buckets, and put them into the free list uint newBuckets = bucketPtr->monsterBucketExtent()+1; Q_ASSERT(bucketPtr->isEmpty()); if (!previousBucketPtr) { // see https://bugs.kde.org/show_bug.cgi?id=272408 // the monster bucket will be deleted and new smaller ones created // the next bucket for this hash is invalid anyways as done above // but calling the below unconditionally leads to other issues... bucketPtr->setNextBucketForHash(hash, 0); } convertMonsterBucket(bucket, 0); for(uint created = bucket; created < bucket + newBuckets; ++created) { putIntoFreeList(created, bucketForIndex(created)); #ifdef DEBUG_MONSTERBUCKETS Q_ASSERT(m_freeSpaceBuckets.indexOf(created) != -1); #endif } }else{ putIntoFreeList(bucket, bucketPtr); } } typedef DynamicItem MyDynamicItem; ///This returns an editable version of the item. @warning: Never change an entry that affects the hash, ///or the equals(..) function. That would completely destroy the consistency. ///@param index The index. It must be valid(match an existing item), and nonzero. ///@warning If you use this, make sure you lock mutex() before calling, /// and hold it until you're ready using/changing the data.. MyDynamicItem dynamicItemFromIndex(unsigned int index) { verifyIndex(index); ThisLocker lock(m_mutex); unsigned short bucket = (index >> 16); MyBucket* bucketPtr = m_buckets.at(bucket); if(!bucketPtr) { initializeBucket(bucket); bucketPtr = m_buckets.at(bucket); } bucketPtr->prepareChange(); unsigned short indexInBucket = index & 0xffff; return MyDynamicItem(const_cast(bucketPtr->itemFromIndex(indexInBucket)), bucketPtr->data(), bucketPtr->dataSize()); } ///This returns an editable version of the item. @warning: Never change an entry that affects the hash, ///or the equals(..) function. That would completely destroy the consistency. ///@param index The index. It must be valid(match an existing item), and nonzero. ///@warning If you use this, make sure you lock mutex() before calling, /// and hold it until you're ready using/changing the data.. ///@warning If you change contained complex items that depend on reference-counting, you /// must use dynamicItemFromIndex(..) instead of dynamicItemFromIndexSimple(..) Item* dynamicItemFromIndexSimple(unsigned int index) { verifyIndex(index); ThisLocker lock(m_mutex); unsigned short bucket = (index >> 16); MyBucket* bucketPtr = m_buckets.at(bucket); if(!bucketPtr) { initializeBucket(bucket); bucketPtr = m_buckets.at(bucket); } bucketPtr->prepareChange(); unsigned short indexInBucket = index & 0xffff; return const_cast(bucketPtr->itemFromIndex(indexInBucket)); } ///@param index The index. It must be valid(match an existing item), and nonzero. const Item* itemFromIndex(unsigned int index) const { verifyIndex(index); ThisLocker lock(m_mutex); unsigned short bucket = (index >> 16); const MyBucket* bucketPtr = m_buckets.at(bucket); if(!bucketPtr) { initializeBucket(bucket); bucketPtr = m_buckets.at(bucket); } unsigned short indexInBucket = index & 0xffff; return bucketPtr->itemFromIndex(indexInBucket); } struct Statistics { Statistics() : loadedBuckets(-1), currentBucket(-1), usedMemory(-1), loadedMonsterBuckets(-1), usedSpaceForBuckets(-1), freeSpaceInBuckets(-1), lostSpace(-1), freeUnreachableSpace(-1), hashClashedItems(-1), totalItems(-1), hashSize(-1), hashUse(-1), averageInBucketHashSize(-1), averageInBucketUsedSlotCount(-1), averageInBucketSlotChainLength(-1), longestInBucketChain(-1), longestNextBucketChain(-1), totalBucketFollowerSlots(-1), averageNextBucketForHashSequenceLength(-1) { } uint loadedBuckets; uint currentBucket; uint usedMemory; uint loadedMonsterBuckets; uint usedSpaceForBuckets; uint freeSpaceInBuckets; uint lostSpace; uint freeUnreachableSpace; uint hashClashedItems; uint totalItems; uint emptyBuckets; uint hashSize; //How big the hash is uint hashUse; //How many slots in the hash are used uint averageInBucketHashSize; uint averageInBucketUsedSlotCount; float averageInBucketSlotChainLength; uint longestInBucketChain; uint longestNextBucketChain; uint totalBucketFollowerSlots; //Total count of used slots in the nextBucketForHash structure float averageNextBucketForHashSequenceLength; //Average sequence length of a nextBucketForHash sequence(If not empty) QString print() const { QString ret; ret += QStringLiteral("loaded buckets: %1 current bucket: %2 used memory: %3 loaded monster buckets: %4").arg(loadedBuckets).arg(currentBucket).arg(usedMemory).arg(loadedMonsterBuckets); ret += QStringLiteral("\nbucket hash clashed items: %1 total items: %2").arg(hashClashedItems).arg(totalItems); ret += QStringLiteral("\nused space for buckets: %1 free space in buckets: %2 lost space: %3").arg(usedSpaceForBuckets).arg(freeSpaceInBuckets).arg(lostSpace); ret += QStringLiteral("\nfree unreachable space: %1 empty buckets: %2").arg(freeUnreachableSpace).arg(emptyBuckets); ret += QStringLiteral("\nhash size: %1 hash slots used: %2").arg(hashSize).arg(hashUse); ret += QStringLiteral("\naverage in-bucket hash size: %1 average in-bucket used hash slot count: %2 average in-bucket slot chain length: %3 longest in-bucket follower chain: %4").arg(averageInBucketHashSize).arg(averageInBucketUsedSlotCount).arg(averageInBucketSlotChainLength).arg(longestInBucketChain); ret += QStringLiteral("\ntotal count of used next-bucket-for-hash slots: %1 average next-bucket-for-hash sequence length: %2 longest next-bucket chain: %3").arg(totalBucketFollowerSlots).arg(averageNextBucketForHashSequenceLength).arg(longestNextBucketChain); return ret; } operator QString() const { return print(); } }; QString printStatistics() const override { return statistics().print(); } Statistics statistics() const { Statistics ret; uint loadedBuckets = 0; for(int a = 0; a < m_buckets.size(); ++a) if(m_buckets[a]) ++loadedBuckets; #ifdef DEBUG_MONSTERBUCKETS for(int a = 0; a < m_freeSpaceBuckets.size(); ++a) { if(a > 0) { uint prev = a-1; uint prevLargestFree = bucketForIndex(m_freeSpaceBuckets[prev])->largestFreeSize(); uint largestFree = bucketForIndex(m_freeSpaceBuckets[a])->largestFreeSize(); Q_ASSERT( (prevLargestFree < largestFree) || (prevLargestFree == largestFree && m_freeSpaceBuckets[prev] < m_freeSpaceBuckets[a]) ); } } #endif ret.hashSize = bucketHashSize; ret.hashUse = 0; for(uint a = 0; a < bucketHashSize; ++a) if(m_firstBucketForHash[a]) ++ret.hashUse; ret.emptyBuckets = 0; uint loadedMonsterBuckets = 0; for(int a = 0; a < m_buckets.size(); ++a) if(m_buckets[a] && m_buckets[a]->monsterBucketExtent()) loadedMonsterBuckets += m_buckets[a]->monsterBucketExtent()+1; uint usedBucketSpace = MyBucket::DataSize * m_currentBucket; uint freeBucketSpace = 0, freeUnreachableSpace = 0; uint lostSpace = 0; //Should be zero, else something is wrong uint totalInBucketHashSize = 0, totalInBucketUsedSlotCount = 0, totalInBucketChainLengths = 0; uint bucketCount = 0; ret.totalBucketFollowerSlots = 0; ret.averageNextBucketForHashSequenceLength = 0; ret.longestNextBucketChain = 0; ret.longestInBucketChain = 0; for(int a = 1; a < m_currentBucket+1; ++a) { MyBucket* bucket = bucketForIndex(a); if(bucket) { ++bucketCount; bucket->countFollowerIndexLengths(totalInBucketUsedSlotCount, totalInBucketChainLengths, totalInBucketHashSize, ret.longestInBucketChain); for(uint aa = 0; aa < MyBucket::NextBucketHashSize; ++aa) { uint length = 0; uint next = bucket->nextBucketForHash(aa); if(next) { ++ret.totalBucketFollowerSlots; while(next) { ++length; ++ret.averageNextBucketForHashSequenceLength; next = bucketForIndex(next)->nextBucketForHash(aa); } } if(length > ret.longestNextBucketChain) { // qDebug() << "next-bucket-chain in" << a << aa << ":" << length; ret.longestNextBucketChain = length; } } uint bucketFreeSpace = bucket->totalFreeItemsSize() + bucket->available(); freeBucketSpace += bucketFreeSpace; if(m_freeSpaceBuckets.indexOf(a) == -1) freeUnreachableSpace += bucketFreeSpace; if(bucket->isEmpty()) { ++ret.emptyBuckets; Q_ASSERT(!bucket->monsterBucketExtent()); #ifdef DEBUG_MONSTERBUCKETS Q_ASSERT(m_freeSpaceBuckets.contains(a)); #endif } lostSpace += bucket->lostSpace(); a += bucket->monsterBucketExtent(); } } if(ret.totalBucketFollowerSlots) ret.averageNextBucketForHashSequenceLength /= ret.totalBucketFollowerSlots; ret.loadedBuckets = loadedBuckets; ret.currentBucket = m_currentBucket; ret.usedMemory = usedMemory(); ret.loadedMonsterBuckets = loadedMonsterBuckets; ret.hashClashedItems = m_statBucketHashClashes; ret.totalItems = m_statItemCount; ret.usedSpaceForBuckets = usedBucketSpace; ret.freeSpaceInBuckets = freeBucketSpace; ret.lostSpace = lostSpace; ret.freeUnreachableSpace = freeUnreachableSpace; ret.averageInBucketHashSize = totalInBucketHashSize / bucketCount; ret.averageInBucketUsedSlotCount = totalInBucketUsedSlotCount / bucketCount; ret.averageInBucketSlotChainLength = float(totalInBucketChainLengths) / totalInBucketUsedSlotCount; //If m_statBucketHashClashes is high, the bucket-hash needs to be bigger return ret; } uint usedMemory() const { uint used = 0; for(int a = 0; a < m_buckets.size(); ++a) { if(m_buckets[a]) { used += m_buckets[a]->usedMemory(); } } return used; } ///This can be used to safely iterate through all items in the repository ///@param visitor Should be an instance of a class that has a bool operator()(const Item*) member function, /// that returns whether more items are wanted. ///@param onlyInMemory If this is true, only items are visited that are currently in memory. template void visitAllItems(Visitor& visitor, bool onlyInMemory = false) const { ThisLocker lock(m_mutex); for(int a = 1; a <= m_currentBucket; ++a) { if(!onlyInMemory || m_buckets.at(a)) { if(bucketForIndex(a) && !bucketForIndex(a)->visitAllItems(visitor)) return; } } } ///Synchronizes the state on disk to the one in memory, and does some memory-management. ///Should be called on a regular basis. Can be called centrally from the global item repository registry. void store() override { QMutexLocker lock(m_mutex); if(m_file) { if(!m_file->open( QFile::ReadWrite ) || !m_dynamicFile->open( QFile::ReadWrite )) { qFatal("cannot re-open repository file for storing"); return; } for(int a = 0; a < m_buckets.size(); ++a) { if(m_buckets[a]) { if(m_buckets[a]->changed()) { storeBucket(a); } if(m_unloadingEnabled) { const int unloadAfterTicks = 2; if(m_buckets[a]->lastUsed() > unloadAfterTicks) { delete m_buckets[a]; m_buckets[a] = nullptr; }else{ m_buckets[a]->tick(); } } } } if(m_metaDataChanged) { Q_ASSERT(m_dynamicFile); m_file->seek(0); m_file->write((char*)&m_repositoryVersion, sizeof(uint)); uint hashSize = bucketHashSize; m_file->write((char*)&hashSize, sizeof(uint)); uint itemRepositoryVersion = staticItemRepositoryVersion(); m_file->write((char*)&itemRepositoryVersion, sizeof(uint)); m_file->write((char*)&m_statBucketHashClashes, sizeof(uint)); m_file->write((char*)&m_statItemCount, sizeof(uint)); const uint bucketCount = static_cast(m_buckets.size()); m_file->write((char*)&bucketCount, sizeof(uint)); m_file->write((char*)&m_currentBucket, sizeof(uint)); m_file->write((char*)m_firstBucketForHash, sizeof(short unsigned int) * bucketHashSize); Q_ASSERT(m_file->pos() == BucketStartOffset); m_dynamicFile->seek(0); const uint freeSpaceBucketsSize = static_cast(m_freeSpaceBuckets.size()); m_dynamicFile->write((char*)&freeSpaceBucketsSize, sizeof(uint)); m_dynamicFile->write((char*)m_freeSpaceBuckets.data(), sizeof(uint) * freeSpaceBucketsSize); } //To protect us from inconsistency due to crashes. flush() is not enough. We need to close. m_file->close(); m_dynamicFile->close(); Q_ASSERT(!m_file->isOpen()); Q_ASSERT(!m_dynamicFile->isOpen()); } } ///This mutex is used for the thread-safe locking when threadSafe is true. Even if threadSafe is false, it is ///always locked before storing to or loading from disk. ///@warning If threadSafe is false, and you sometimes call store() from within another thread(As happens in duchain), /// you must always make sure that this mutex is locked before you access this repository. /// Else you will get crashes and inconsistencies. /// In KDevelop This means: Make sure you _always_ lock this mutex before accessing the repository. QMutex* mutex() const { return m_mutex; } ///With this, you can replace the internal mutex with another one. void setMutex(QMutex* mutex) { m_mutex = mutex; } QString repositoryName() const override { return m_repositoryName; } private: uint createIndex(ushort bucketIndex, ushort indexInBucket) { //Combine the index in the bucket, and the bucket number into one index const uint index = (bucketIndex << 16) + indexInBucket; verifyIndex(index); return index; } /** * Walks through all buckets clashing with @p hash * * Will return the value returned by the lambda, returning early if truthy */ template auto walkBucketChain(unsigned int hash, const Visitor& visitor) const -> decltype(visitor(0, nullptr)) { unsigned short bucketIndex = m_firstBucketForHash[hash % bucketHashSize]; while (bucketIndex) { auto* bucketPtr = m_buckets.at(bucketIndex); if (!bucketPtr) { initializeBucket(bucketIndex); bucketPtr = m_buckets.at(bucketIndex); } if (auto visitResult = visitor(bucketIndex, bucketPtr)) { return visitResult; } bucketIndex = bucketPtr->nextBucketForHash(hash); } return {}; // clazy:exclude=returning-void-expression } ///Makes sure the order within m_freeSpaceBuckets is correct, after largestFreeSize has been changed for m_freeSpaceBuckets[index]. ///If too few space is free within the given bucket, it is removed from m_freeSpaceBuckets. void updateFreeSpaceOrder(uint index) { m_metaDataChanged = true; unsigned int* freeSpaceBuckets = m_freeSpaceBuckets.data(); Q_ASSERT(index < static_cast(m_freeSpaceBuckets.size())); MyBucket* bucketPtr = bucketForIndex(freeSpaceBuckets[index]); unsigned short largestFreeSize = bucketPtr->largestFreeSize(); if(largestFreeSize == 0 || (bucketPtr->freeItemCount() <= MyBucket::MaxFreeItemsForHide && largestFreeSize <= MyBucket::MaxFreeSizeForHide)) { //Remove the item from freeSpaceBuckets m_freeSpaceBuckets.remove(index); }else{ while(1) { int prev = index-1; int next = index+1; if(prev >= 0 && (bucketForIndex(freeSpaceBuckets[prev])->largestFreeSize() > largestFreeSize || (bucketForIndex(freeSpaceBuckets[prev])->largestFreeSize() == largestFreeSize && freeSpaceBuckets[index] < freeSpaceBuckets[prev])) ) { //This item should be behind the successor, either because it has a lower largestFreeSize, or because the index is lower uint oldPrevValue = freeSpaceBuckets[prev]; freeSpaceBuckets[prev] = freeSpaceBuckets[index]; freeSpaceBuckets[index] = oldPrevValue; index = prev; }else if(next < m_freeSpaceBuckets.size() && (bucketForIndex(freeSpaceBuckets[next])->largestFreeSize() < largestFreeSize || (bucketForIndex(freeSpaceBuckets[next])->largestFreeSize() == largestFreeSize && freeSpaceBuckets[index] > freeSpaceBuckets[next]))) { //This item should be behind the successor, either because it has a higher largestFreeSize, or because the index is higher uint oldNextValue = freeSpaceBuckets[next]; freeSpaceBuckets[next] = freeSpaceBuckets[index]; freeSpaceBuckets[index] = oldNextValue; index = next; }else { break; } } } } ///Does conversion from monster-bucket to normal bucket and from normal bucket to monster-bucket ///The bucket @param bucketNumber must already be loaded and empty. the "extent" buckets behind must also be loaded, ///and also be empty. ///The created buckets are not registered anywhere. When converting from monster-bucket to normal bucket, ///oldExtent+1 normal buckets are created, that must be registered somewhere. ///@warning During conversion, all the touched buckets are deleted and re-created ///@param extent When this is zero, the bucket is converted from monster-bucket to normal bucket. /// When it is nonzero, it is converted to a monster-bucket. MyBucket* convertMonsterBucket(int bucketNumber, int extent) { Q_ASSERT(bucketNumber); MyBucket* bucketPtr = m_buckets.at(bucketNumber); if(!bucketPtr) { initializeBucket(bucketNumber); bucketPtr = m_buckets.at(bucketNumber); } if(extent) { //Convert to monster-bucket #ifdef DEBUG_MONSTERBUCKETS for(int index = bucketNumber; index < bucketNumber + 1 + extent; ++index) { Q_ASSERT(bucketPtr->isEmpty()); Q_ASSERT(!bucketPtr->monsterBucketExtent()); Q_ASSERT(m_freeSpaceBuckets.indexOf(index) == -1); } #endif for(int index = bucketNumber; index < bucketNumber + 1 + extent; ++index) deleteBucket(index); m_buckets[bucketNumber] = new MyBucket(); m_buckets[bucketNumber]->initialize(extent); #ifdef DEBUG_MONSTERBUCKETS for(uint index = bucketNumber+1; index < bucketNumber + 1 + extent; ++index) { Q_ASSERT(!m_buckets[index]); } #endif }else{ Q_ASSERT(bucketPtr->monsterBucketExtent()); Q_ASSERT(bucketPtr->isEmpty()); const int oldExtent = bucketPtr->monsterBucketExtent(); deleteBucket(bucketNumber); //Delete the monster-bucket for(int index = bucketNumber; index < bucketNumber + 1 + oldExtent; ++index) { Q_ASSERT(!m_buckets[index]); m_buckets[index] = new MyBucket(); m_buckets[index]->initialize(0); Q_ASSERT(!m_buckets[index]->monsterBucketExtent()); } } return m_buckets[bucketNumber]; } MyBucket* bucketForIndex(short unsigned int index) const { MyBucket* bucketPtr = m_buckets.at(index); if(!bucketPtr) { initializeBucket(index); bucketPtr = m_buckets.at(index); } return bucketPtr; } bool open(const QString& path) override { QMutexLocker lock(m_mutex); close(); //qDebug() << "opening repository" << m_repositoryName << "at" << path; QDir dir(path); m_file = new QFile(dir.absoluteFilePath( m_repositoryName )); m_dynamicFile = new QFile(dir.absoluteFilePath( m_repositoryName + QLatin1String("_dynamic") )); if(!m_file->open( QFile::ReadWrite ) || !m_dynamicFile->open( QFile::ReadWrite ) ) { delete m_file; m_file = nullptr; delete m_dynamicFile; m_dynamicFile = nullptr; return false; } m_metaDataChanged = true; if(m_file->size() == 0) { m_file->resize(0); m_file->write((char*)&m_repositoryVersion, sizeof(uint)); uint hashSize = bucketHashSize; m_file->write((char*)&hashSize, sizeof(uint)); uint itemRepositoryVersion = staticItemRepositoryVersion(); m_file->write((char*)&itemRepositoryVersion, sizeof(uint)); m_statBucketHashClashes = m_statItemCount = 0; m_file->write((char*)&m_statBucketHashClashes, sizeof(uint)); m_file->write((char*)&m_statItemCount, sizeof(uint)); m_buckets.resize(10); m_buckets.fill(nullptr); uint bucketCount = m_buckets.size(); m_file->write((char*)&bucketCount, sizeof(uint)); memset(m_firstBucketForHash, 0, bucketHashSize * sizeof(short unsigned int)); m_currentBucket = 1; //Skip the first bucket, we won't use it so we have the zero indices for special purposes m_file->write((char*)&m_currentBucket, sizeof(uint)); m_file->write((char*)m_firstBucketForHash, sizeof(short unsigned int) * bucketHashSize); //We have completely initialized the file now if(m_file->pos() != BucketStartOffset) { KMessageBox::error(nullptr, i18n("Failed writing to %1, probably the disk is full", m_file->fileName())); abort(); } const uint freeSpaceBucketsSize = 0; m_dynamicFile->write((char*)&freeSpaceBucketsSize, sizeof(uint)); m_freeSpaceBuckets.clear(); }else{ m_file->close(); bool res = m_file->open( QFile::ReadOnly ); //Re-open in read-only mode, so we create a read-only m_fileMap VERIFY(res); //Check that the version is correct uint storedVersion = 0, hashSize = 0, itemRepositoryVersion = 0; m_file->read((char*)&storedVersion, sizeof(uint)); m_file->read((char*)&hashSize, sizeof(uint)); m_file->read((char*)&itemRepositoryVersion, sizeof(uint)); m_file->read((char*)&m_statBucketHashClashes, sizeof(uint)); m_file->read((char*)&m_statItemCount, sizeof(uint)); if(storedVersion != m_repositoryVersion || hashSize != bucketHashSize || itemRepositoryVersion != staticItemRepositoryVersion()) { qDebug() << "repository" << m_repositoryName << "version mismatch in" << m_file->fileName() << ", stored: version " << storedVersion << "hashsize" << hashSize << "repository-version" << itemRepositoryVersion << " current: version" << m_repositoryVersion << "hashsize" << bucketHashSize << "repository-version" << staticItemRepositoryVersion(); delete m_file; m_file = nullptr; delete m_dynamicFile; m_dynamicFile = nullptr; return false; } m_metaDataChanged = false; uint bucketCount = 0; m_file->read((char*)&bucketCount, sizeof(uint)); m_buckets.resize(bucketCount); m_file->read((char*)&m_currentBucket, sizeof(uint)); m_file->read((char*)m_firstBucketForHash, sizeof(short unsigned int) * bucketHashSize); Q_ASSERT(m_file->pos() == BucketStartOffset); uint freeSpaceBucketsSize = 0; m_dynamicFile->read((char*)&freeSpaceBucketsSize, sizeof(uint)); m_freeSpaceBuckets.resize(freeSpaceBucketsSize); m_dynamicFile->read((char*)m_freeSpaceBuckets.data(), sizeof(uint) * freeSpaceBucketsSize); } m_fileMapSize = 0; m_fileMap = nullptr; #ifdef ITEMREPOSITORY_USE_MMAP_LOADING if(m_file->size() > BucketStartOffset){ m_fileMap = m_file->map(BucketStartOffset, m_file->size() - BucketStartOffset); Q_ASSERT(m_file->isOpen()); Q_ASSERT(m_file->size() >= BucketStartOffset); if(m_fileMap){ m_fileMapSize = m_file->size() - BucketStartOffset; }else{ qWarning() << "mapping" << m_file->fileName() << "FAILED!"; } } #endif //To protect us from inconsistency due to crashes. flush() is not enough. m_file->close(); m_dynamicFile->close(); return true; } ///@warning by default, this does not store the current state to disk. void close(bool doStore = false) override { if(doStore) store(); if(m_file) m_file->close(); delete m_file; m_file = nullptr; m_fileMap = nullptr; m_fileMapSize = 0; if(m_dynamicFile) m_dynamicFile->close(); delete m_dynamicFile; m_dynamicFile = nullptr; qDeleteAll(m_buckets); m_buckets.clear(); memset(m_firstBucketForHash, 0, bucketHashSize * sizeof(short unsigned int)); } struct AllItemsReachableVisitor { explicit AllItemsReachableVisitor(ItemRepository* rep) : repository(rep) { } bool operator()(const Item* item) { return repository->itemReachable(item); } ItemRepository* repository; }; //Returns whether the given item is reachable through its hash bool itemReachable(const Item* item) const { const uint hash = item->hash(); return walkBucketChain(hash, [=](ushort /*bucketIndex*/, const MyBucket* bucketPtr) { return bucketPtr->itemReachable(item, hash); }); } //Returns true if all items in the given bucket are reachable through their hashes bool allItemsReachable(unsigned short bucket) { if(!bucket) return true; MyBucket* bucketPtr = bucketForIndex(bucket); AllItemsReachableVisitor visitor(this); return bucketPtr->visitAllItems(visitor); } int finalCleanup() override { ThisLocker lock(m_mutex); int changed = 0; for(int a = 1; a <= m_currentBucket; ++a) { MyBucket* bucket = bucketForIndex(a); if(bucket && bucket->dirty()) { ///@todo Faster dirty check, without loading bucket changed += bucket->finalCleanup(*this); } a += bucket->monsterBucketExtent(); //Skip buckets that are attached as tail to monster-buckets } return changed; } inline void initializeBucket(int bucketNumber) const { Q_ASSERT(bucketNumber); #ifdef DEBUG_MONSTERBUCKETS for(uint offset = 1; offset < 5; ++offset) { int test = bucketNumber - offset; if(test >= 0 && m_buckets[test]) { Q_ASSERT(m_buckets[test]->monsterBucketExtent() < offset); } } #endif if(!m_buckets[bucketNumber]) { m_buckets[bucketNumber] = new MyBucket(); bool doMMapLoading = (bool)m_fileMap; uint offset = ((bucketNumber-1) * MyBucket::DataSize); if(m_file && offset < m_fileMapSize && doMMapLoading && *reinterpret_cast(m_fileMap + offset) == 0) { // qDebug() << "loading bucket mmap:" << bucketNumber; m_buckets[bucketNumber]->initializeFromMap(reinterpret_cast(m_fileMap + offset)); } else if(m_file) { //Either memory-mapping is disabled, or the item is not in the existing memory-map, //so we have to load it the classical way. bool res = m_file->open( QFile::ReadOnly ); if(offset + BucketStartOffset < m_file->size()) { VERIFY(res); offset += BucketStartOffset; m_file->seek(offset); uint monsterBucketExtent; m_file->read((char*)(&monsterBucketExtent), sizeof(unsigned int));; m_file->seek(offset); ///FIXME: use the data here instead of copying it again in prepareChange QByteArray data = m_file->read((1+monsterBucketExtent) * MyBucket::DataSize); m_buckets[bucketNumber]->initializeFromMap(data.data()); m_buckets[bucketNumber]->prepareChange(); }else{ m_buckets[bucketNumber]->initialize(0); } m_file->close(); }else{ m_buckets[bucketNumber]->initialize(0); } }else{ m_buckets[bucketNumber]->initialize(0); } } ///Can only be called on empty buckets void deleteBucket(int bucketNumber) { Q_ASSERT(bucketForIndex(bucketNumber)->isEmpty()); Q_ASSERT(bucketForIndex(bucketNumber)->noNextBuckets()); delete m_buckets[bucketNumber]; m_buckets[bucketNumber] = nullptr; } //m_file must be opened void storeBucket(int bucketNumber) const { if(m_file && m_buckets[bucketNumber]) { m_buckets[bucketNumber]->store(m_file, BucketStartOffset + (bucketNumber-1) * MyBucket::DataSize); } } /// If mustFindBucket is zero, the whole chain is just walked. This is good for debugging for infinite recursion. /// @return whether @p mustFindBucket was found bool walkBucketLinks(uint checkBucket, uint hash, uint mustFindBucket = 0) const { bool found = false; while(checkBucket) { if(checkBucket == mustFindBucket) found = true; checkBucket = bucketForIndex(checkBucket)->nextBucketForHash(hash); } return found || (mustFindBucket == 0); } /// Computes the bucket where the chains opened by the buckets @p mainHead and @p intersectorHead /// with hash @p hash meet each other. /// @return QPair hashChainIntersection(uint mainHead, uint intersectorHead, uint hash) const { uint previous = 0; uint current = mainHead; while(current) { ///@todo Make this more efficient if(walkBucketLinks(intersectorHead, hash, current)) return qMakePair(previous, current); previous = current; current = bucketForIndex(current)->nextBucketForHash(hash); } return qMakePair(0u, 0u); } void putIntoFreeList(unsigned short bucket, MyBucket* bucketPtr) { Q_ASSERT(!bucketPtr->monsterBucketExtent()); int indexInFree = m_freeSpaceBuckets.indexOf(bucket); if(indexInFree == -1 && (bucketPtr->freeItemCount() >= MyBucket::MinFreeItemsForReuse || bucketPtr->largestFreeSize() >= MyBucket::MinFreeSizeForReuse)) { //Add the bucket to the list of buckets from where to re-assign free space //We only do it when a specific threshold of empty items is reached, because that way items can stay "somewhat" semantically ordered. Q_ASSERT(bucketPtr->largestFreeSize()); int insertPos; for(insertPos = 0; insertPos < m_freeSpaceBuckets.size(); ++insertPos) { if(bucketForIndex(m_freeSpaceBuckets[insertPos])->largestFreeSize() > bucketPtr->largestFreeSize()) break; } m_freeSpaceBuckets.insert(insertPos, bucket); updateFreeSpaceOrder(insertPos); }else if(indexInFree != -1) { ///Re-order so the order in m_freeSpaceBuckets is correct(sorted by largest free item size) updateFreeSpaceOrder(indexInFree); } #ifdef DEBUG_MONSTERBUCKETS if(bucketPtr->isEmpty()) { Q_ASSERT(m_freeSpaceBuckets.contains(bucket)); } #endif } void verifyIndex(uint index) const { // We don't use zero indices Q_ASSERT(index); int bucket = (index >> 16); // nor zero buckets Q_ASSERT(bucket); Q_ASSERT_X(bucket < m_buckets.size(), Q_FUNC_INFO, qPrintable(QStringLiteral("index %1 gives invalid bucket number %2, current count is: %3") .arg(index) .arg(bucket) .arg(m_buckets.size()))); // don't trigger compile warnings in release mode Q_UNUSED(bucket); Q_UNUSED(index); } bool m_metaDataChanged; mutable QMutex m_ownMutex; mutable QMutex* m_mutex; QString m_repositoryName; mutable int m_currentBucket; //List of buckets that have free space available that can be assigned. Sorted by size: Smallest space first. Second order sorting: Bucket index QVector m_freeSpaceBuckets; mutable QVector m_buckets; uint m_statBucketHashClashes, m_statItemCount; //Maps hash-values modulo 1< This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef EMBEDDED_FREE_TREE #define EMBEDDED_FREE_TREE #include "kdevvarlengtharray.h" #include #include #include #include //Uncomment this to search for tree-inconsistencies, however it's very expensive // #define DEBUG_FREEITEM_COUNT debugFreeItemCount(); verifyTreeConsistent(*m_centralFreeItem, 0, m_itemCount); #define DEBUG_FREEITEM_COUNT /** * This file implements algorithms that allow managing a sorted list of items, and managing "free" items * for reuse efficiently in that list. Among those free items a tree is built, and they are traversed * on insertion/removal to manage free items in the tree. * * There is specific needs on the embedded items: * - They must be markable "invalid", so after they are deleted they can stay in their place as invalid items. * - While they are invalid, they still must be able to hold 2 integers, needed for managing the tree of free items. * - One integer is needed for each list to hold a pointer to the central free item. * * Only these functions must be used to manipulate the lists, from the beginning up. First create an empty list * and initialize centralFreeItem with -1, and then you start adding items. * * Since the list is sorted, and each item can be contained only once, these lists actually represent a set. * * EmbeddedTreeAlgorithms implements an efficient "contains" function that uses binary search within the list. */ namespace KDevelop { ///Responsible for handling the items in the list ///This is an example. ItemHandler::rightChild(..) and ItemHandler::leftChild(..) must be values that must be able to hold the count of positive ///values that will be the maximum size of the list, and additionally -1. // template // class ExampleItemHandler { // public: // ExampleItemHandler(const Data& data) : m_data(data) { // } // int ItemHandler::rightChild() const { // Q_ASSERT(0); // } // int ItemHandler::leftChild() const { // Q_ASSERT(0); // } // void ItemHandler::setLeftChild(int child) { // Q_ASSERT(0); // } // void setRightChild(int child) { // Q_ASSERT(0); // } // bool operator<(const StandardItemHandler& rhs) const { // Q_ASSERT(0); // } // //Copies this item into the given one // void copyTo(Data& data) const { // data = m_data; // } // // static void createFreeItem(Data& data) { // data = Data(); // } // // bool isFree() const { // Q_ASSERT(0); // } // // const Data& data() { // } // // private: // const Data& m_data; // }; /** * Use this for several constant algorithms on sorted lists with free-trees * */ template class EmbeddedTreeAlgorithms { public: EmbeddedTreeAlgorithms(const Data* items, uint itemCount, const int& centralFreeItem) : m_items(items), m_itemCount(itemCount), m_centralFreeItem(¢ralFreeItem) { } ~EmbeddedTreeAlgorithms() { } ///Efficiently checks whether the item is contained in the set. ///If it is contained, returns the index. Else, returns -1. int indexOf(const Data& data) { return indexOf(data, 0, m_itemCount); } ///Searches the given item within the specified bounds. int indexOf(const Data& data, uint start, uint end) { while(1) { if(start >= end) return -1; int center = (start + end)/2; //Skip free items, since they cannot be used for ordering for(; center < (int)end; ) { if(!ItemHandler::isFree(m_items[center])) break; ++center; } if(center == (int)end) { end = (start + end)/2; //No non-free items found in second half, so continue search in the other }else{ if(ItemHandler::equals(data, m_items[center])) { return center; }else if(data < m_items[center]) { end = (start + end)/2; }else{ //Continue search in second half start = center+1; } } } } ///Returns the first valid index that has a data-value larger or equal to @p data. ///Returns -1 if nothing is found. int lowerBound(const Data& data, int start, int end) { int currentBound = -1; while(1) { if(start >= end) return currentBound; int center = (start + end)/2; //Skip free items, since they cannot be used for ordering for(; center < end; ) { if(!ItemHandler::isFree(m_items[center])) break; ++center; } if(center == end) { end = (start + end)/2; //No non-free items found in second half, so continue search in the other }else{ if(ItemHandler::equals(data, m_items[center])) { return center; }else if(data < m_items[center]) { currentBound = center; end = (start + end)/2; }else{ //Continue search in second half start = center+1; } } } } uint countFreeItems() const { return countFreeItemsInternal(*m_centralFreeItem); } uint countFreeItemsNaive() const { uint ret = 0; for(uint a = 0; a < m_itemCount; ++a) { if(ItemHandler::isFree(m_items[a])) ++ret; } return ret; } void verifyOrder() { Data last; for(uint a = 0; a < m_itemCount; ++a) { if(!ItemHandler::isFree(m_items[a])) { if(!ItemHandler::isFree(last)) Q_ASSERT(last < m_items[a]); last = m_items[a]; } } } void verifyTreeConsistent() { verifyTreeConsistentInternal(*m_centralFreeItem, 0, m_itemCount); Q_ASSERT(countFreeItems() == countFreeItemsNaive()); } private: void verifyTreeConsistentInternal(int position, int lowerBound, int upperBound) { if(position == -1) return; Q_ASSERT(lowerBound <= position && position < upperBound); verifyTreeConsistentInternal(ItemHandler::leftChild(m_items[position]), lowerBound, position); verifyTreeConsistentInternal(ItemHandler::rightChild(m_items[position]), position+1, upperBound); } uint countFreeItemsInternal(int item) const { if(item == -1) return 0; return 1 + countFreeItemsInternal(ItemHandler::leftChild(m_items[item])) + countFreeItemsInternal(ItemHandler::rightChild(m_items[item])); } const Data* m_items; uint m_itemCount; const int* m_centralFreeItem; }; /**Use this to add items. * The added item must not be in the set yet! * General usage: * - Construct the object * - Check if newItemCount() equals the previous item-count. If not, construct * a new list as large as newItemCount, and call object.transferData to transfer the data * into the new list. The new size must match the returned newItemCount. * - Either call object.apply(), or let it be called automatically by the destructor. * @tparam increaseFraction By what fraction the list is increased when it needs to. For example the size will be increased by 1/5 if it's 5. * @tparam rebuildIfInsertionMoreExpensive The structure is rebuilt completely when an insertion needs a moving around of more than rebuildIfInsertionMoreExpensive times the count of items needed to be moved in worst case in a fresh tree. * After rebuilding the tree, the free space is evenly distributed, and thus insertions require much less moving. * */ template class EmbeddedTreeAddItem { public: EmbeddedTreeAddItem(Data* items, uint itemCount, int& centralFreeItem, const Data& add) : m_add(add), m_items(items), m_itemCount(itemCount), m_centralFreeItem(¢ralFreeItem), m_applied(false), m_needResize(false) { m_needResize = !apply(); } ~EmbeddedTreeAddItem() { if(!m_applied) apply(true); } ///Check this to see whether a new item-count is needed. If this does not equal the given itemCount, then ///the data needs to be transferred into a new list using transferData uint newItemCount() const { if(!m_applied) { if(*m_centralFreeItem == -1) { uint realItemCount = m_itemCount - countFreeItems(*m_centralFreeItem); uint newItemCount = realItemCount + (realItemCount/increaseFraction); if(newItemCount <= m_itemCount) newItemCount = m_itemCount+1; return newItemCount; }else if(m_needResize) { uint realItemCount = m_itemCount - countFreeItems(*m_centralFreeItem); uint newItemCount = realItemCount + (realItemCount/increaseFraction); return newItemCount; } } return m_itemCount; } ///Transfers the data into a new item-list. The size of the new item-list must equal newItemCount() void transferData(Data* newItems, uint newCount, int* newCentralFree = nullptr) { DEBUG_FREEITEM_COUNT uint currentRealCount = m_itemCount - countFreeItems(*m_centralFreeItem); // Q_ASSERT(currentRealCount + (currentRealCount/increaseFraction) == newCount); //Create a new list where the items from m_items are put into newItems, with the free items evenly //distributed, and a clean balanced free-tree. uint newFreeCount = newCount - currentRealCount; volatile uint freeItemRaster; if(newFreeCount) freeItemRaster = newCount / newFreeCount; else { freeItemRaster = newCount+1; //No free items } ///@todo Do not iterate through all items, instead use the free-tree and memcpy for the ranges between free items. ///Ideally, even the free-tree would be built on-the-fly. Q_ASSERT(freeItemRaster); uint offset = 0; uint insertedValidCount = 0; for(uint a = 0; a < newCount; ++a) { //Create new free items at the end of their raster range if(a % freeItemRaster == (freeItemRaster-1)) { //We need to insert a free item ItemHandler::createFreeItem(newItems[a]); ++offset; }else{ ++insertedValidCount; while(ItemHandler::isFree(m_items[a-offset]) && a-offset < m_itemCount) --offset; Q_ASSERT(a-offset < m_itemCount); newItems[a] = m_items[a-offset]; // Q_ASSERT(!ItemHandler::isFree(newItems[a])); } } Q_ASSERT(insertedValidCount == m_itemCount - countFreeItems(*m_centralFreeItem)); // qCDebug(UTIL) << m_itemCount << newCount << offset; // Q_ASSERT(m_itemCount == newCount-offset); m_items = newItems; m_itemCount = newCount; if(newCentralFree) m_centralFreeItem = newCentralFree; *m_centralFreeItem = buildFreeTree(newFreeCount, freeItemRaster, freeItemRaster-1); // qCDebug(UTIL) << "count of new free items:" << newFreeCount; // Q_ASSERT(countFreeItems( *m_centralFreeItem ) == newFreeCount); DEBUG_FREEITEM_COUNT } ///Tries to put the item into the list. If the insertion would be too inefficient or is not possible, returns false, unless @param force is true bool apply(bool force = false) { if(m_applied) return true; if(*m_centralFreeItem == -1) { Q_ASSERT(!force); return false; } //Find the free item that is nearest to the target position in the item order int previousItem = -1; int currentItem = *m_centralFreeItem; int replaceCurrentWith = -1; //In currentLowerBound and currentUpperBound, we count the smallest contiguous range between free //items that the added items needs to be sorted into. If the range is empty, the item can just be inserted. int currentLowerBound = 0; int currentUpperBound = m_itemCount; //Now go down the chain, always into the items direction while(1) { QPair freeBounds = leftAndRightRealItems(currentItem); const Data& current(m_items[currentItem]); if(freeBounds.first != -1 && m_add < m_items[freeBounds.first]) { //Follow left child currentUpperBound = freeBounds.first+1; if(ItemHandler::leftChild(current) != -1) { //Continue traversing previousItem = currentItem; currentItem = ItemHandler::leftChild(current); }else{ replaceCurrentWith = ItemHandler::rightChild(current); break; } }else if(freeBounds.second != -1 && m_items[freeBounds.second] < m_add) { //Follow right child currentLowerBound = freeBounds.second; if(ItemHandler::rightChild(current) != -1) { //Continue traversing previousItem = currentItem; currentItem = ItemHandler::rightChild(current); }else{ replaceCurrentWith = ItemHandler::leftChild(current); break; } }else{ //We will use this item! So find a replacement for it in the tree, and update the structure force = true; currentLowerBound = currentUpperBound = currentItem; int leftReplaceCandidate = -1, rightReplaceCandidate = -1; if(ItemHandler::leftChild(current) != -1) leftReplaceCandidate = rightMostChild(ItemHandler::leftChild(current)); if(ItemHandler::rightChild(current) != -1) rightReplaceCandidate = leftMostChild(ItemHandler::rightChild(current)); ///@todo it's probably better using lowerBound and upperBound like in the "remove" version //Left and right bounds of all children of current int leftChildBound = leftMostChild(currentItem), rightChildBound = rightMostChild(currentItem); Q_ASSERT(leftChildBound != -1 && rightChildBound != -1); int childCenter = (leftChildBound + rightChildBound)/2; if(leftReplaceCandidate == -1 && rightReplaceCandidate == -1) { //We don't have a replace candidate, since there is no children Q_ASSERT(ItemHandler::leftChild(current) == -1); Q_ASSERT(ItemHandler::rightChild(current) == -1); }else if(rightReplaceCandidate == -1 || abs(leftReplaceCandidate - childCenter) < abs(rightReplaceCandidate - childCenter)) { //pick the left replacement, since it's more central Q_ASSERT(leftReplaceCandidate != -1); replaceCurrentWith = leftReplaceCandidate; Data& replaceWith(m_items[replaceCurrentWith]); if(replaceCurrentWith == ItemHandler::leftChild(current)) { //The left child of replaceWith can just stay as it is, and we just need to add the right child Q_ASSERT(ItemHandler::rightChild(replaceWith) == -1); }else{ takeRightMostChild(ItemHandler::leftChild(current)); //Since we'll be clearing the item, we have to put this childsomewhere else. // Either make it our new "left" child, or make it the new left children "rightmost" child. int addRightMostLeftChild = ItemHandler::leftChild(replaceWith); ItemHandler::setLeftChild(replaceWith, -1); Q_ASSERT(ItemHandler::leftChild(replaceWith) == -1); Q_ASSERT(ItemHandler::rightChild(replaceWith) == -1); if(ItemHandler::leftChild(current) != -1) { Q_ASSERT(rightMostChild(ItemHandler::leftChild(current)) != replaceCurrentWith); Q_ASSERT(ItemHandler::leftChild(current) == -1 || ItemHandler::leftChild(current) < replaceCurrentWith); ItemHandler::setLeftChild(replaceWith, ItemHandler::leftChild(current)); if(addRightMostLeftChild != -1) { int rightMostLeft = rightMostChild(ItemHandler::leftChild(replaceWith)); Q_ASSERT(rightMostLeft != -1); // Q_ASSERT(item(rightMostLeft).ItemHandler::rightChild() == -1); Q_ASSERT(rightMostLeft < addRightMostLeftChild); ItemHandler::setRightChild(m_items[rightMostLeft], addRightMostLeftChild); } }else{ Q_ASSERT(addRightMostLeftChild == -1 || addRightMostLeftChild < replaceCurrentWith); ItemHandler::setLeftChild(replaceWith, addRightMostLeftChild); } } Q_ASSERT(ItemHandler::rightChild(current) == -1 || replaceCurrentWith < ItemHandler::rightChild(current)); ItemHandler::setRightChild(replaceWith, ItemHandler::rightChild(current)); }else{ //pick the right replacement, since it's more central Q_ASSERT(rightReplaceCandidate != -1); replaceCurrentWith = rightReplaceCandidate; Data& replaceWith(m_items[replaceCurrentWith]); if(replaceCurrentWith == ItemHandler::rightChild(current)) { //The right child of replaceWith can just stay as it is, and we just need to add the left child Q_ASSERT(ItemHandler::leftChild(replaceWith) == -1); }else{ takeLeftMostChild(ItemHandler::rightChild(current)); //Since we'll be clearing the item, we have to put this childsomewhere else. // Either make it our new "right" child, or make it the new right children "leftmost" child. int addLeftMostRightChild = ItemHandler::rightChild(replaceWith); ItemHandler::setRightChild(replaceWith, -1); Q_ASSERT(ItemHandler::rightChild(replaceWith) == -1); Q_ASSERT(ItemHandler::leftChild(replaceWith) == -1); if(ItemHandler::rightChild(current) != -1) { Q_ASSERT(leftMostChild(ItemHandler::rightChild(current)) != replaceCurrentWith); Q_ASSERT(ItemHandler::rightChild(current) == -1 || replaceCurrentWith < ItemHandler::rightChild(current)); ItemHandler::setRightChild(replaceWith, ItemHandler::rightChild(current)); if(addLeftMostRightChild != -1) { int leftMostRight = leftMostChild(ItemHandler::rightChild(replaceWith)); Q_ASSERT(leftMostRight != -1); Q_ASSERT(ItemHandler::leftChild(m_items[leftMostRight]) == -1); Q_ASSERT(addLeftMostRightChild < leftMostRight); ItemHandler::setLeftChild(m_items[leftMostRight], addLeftMostRightChild); } }else{ Q_ASSERT(addLeftMostRightChild == -1 || replaceCurrentWith < addLeftMostRightChild); ItemHandler::setRightChild(replaceWith, addLeftMostRightChild); } } Q_ASSERT(ItemHandler::leftChild(current) == -1 || ItemHandler::leftChild(current) < replaceCurrentWith); ItemHandler::setLeftChild(replaceWith, ItemHandler::leftChild(current)); } break; } } //We can insert now //currentItem and previousItem are the two items that best enclose the target item // for(int a = currentLowerBound; a < currentUpperBound; ++a) { // Q_ASSERT(!ItemHandler::isFree(m_items[a])); // } Q_ASSERT(currentItem < currentLowerBound || currentItem >= currentUpperBound); //If the current item is on a border of the bounds, it needs to be inserted in the right position. //Else, the current position already is right, and we only need to copy it in. if(currentLowerBound < currentUpperBound && (currentItem == currentLowerBound-1 || currentItem == currentUpperBound)) { if(!insertSorted(m_add, currentItem, currentLowerBound, currentUpperBound, force)) { return false; } }else{ ItemHandler::copyTo(m_add, m_items[currentItem]); } m_applied = true; //First, take currentItem out of the chain, by replacing it with current.rightChild in the parent if(previousItem != -1) { Data& previous(m_items[previousItem]); if(ItemHandler::leftChild(previous) == currentItem) { Q_ASSERT(replaceCurrentWith == -1 || replaceCurrentWith < previousItem); ItemHandler::setLeftChild(previous, replaceCurrentWith); } else if(ItemHandler::rightChild(previous) == currentItem) { Q_ASSERT(replaceCurrentWith == -1 || previousItem < replaceCurrentWith); ItemHandler::setRightChild(previous, replaceCurrentWith); } else { Q_ASSERT(0); } } else { *m_centralFreeItem = replaceCurrentWith; } return true; DEBUG_FREEITEM_COUNT } private: void verifyTreeConsistent(int position, int lowerBound, int upperBound) { if(position == -1) return; Q_ASSERT(lowerBound <= position && position < upperBound); verifyTreeConsistent(ItemHandler::leftChild(m_items[position]), lowerBound, position); verifyTreeConsistent(ItemHandler::rightChild(m_items[position]), position+1, upperBound); } void debugFreeItemCount() { uint count = 0; for(uint a = 0; a < m_itemCount; ++a) { if(isFree(m_items[a])) ++count; } uint counted = countFreeItems(*m_centralFreeItem); Q_ASSERT(count == counted); Q_UNUSED(counted); } QPair leftAndRightRealItems(int pos) { Q_ASSERT(ItemHandler::isFree(m_items[pos])); int left = -1, right = -1; for(int a = pos-1; a >= 0; --a) { if(!ItemHandler::isFree(m_items[a])) { left = a; break; } } for(uint a = pos+1; a < m_itemCount; ++a) { if(!ItemHandler::isFree(m_items[a])) { right = a; break; } } return qMakePair(left, right); } int buildFreeTree(int count, uint raster, int start) { Q_ASSERT((start % raster) == (raster-1)); Q_ASSERT(count != 0); Q_ASSERT(count <= (int)m_itemCount); if(count == 1) { ItemHandler::createFreeItem(m_items[start]); return start; }else{ int central = start + (count / 2) * raster; int leftCount = count / 2; int midCount = 1; int rightCount = count - leftCount - midCount; Q_ASSERT(leftCount + midCount <= count); ItemHandler::createFreeItem(m_items[central]); Q_ASSERT(ItemHandler::isFree(m_items[central])); int leftFreeTree = buildFreeTree(leftCount, raster, start); Q_ASSERT(leftFreeTree == -1 || leftFreeTree < central); ItemHandler::setLeftChild(m_items[central], leftFreeTree ); if(rightCount > 0) { int rightFreeTree = buildFreeTree(rightCount, raster, central+raster); Q_ASSERT(rightFreeTree == -1 || central < rightFreeTree); ItemHandler::setRightChild(m_items[central], rightFreeTree ); } return central; } } uint countFreeItems(int item) const { if(item == -1) return 0; const Data& current(m_items[item]); return 1 + countFreeItems(ItemHandler::leftChild(current)) + countFreeItems(ItemHandler::rightChild(current)); } int leftMostChild(int pos) const { while(1) { if(ItemHandler::leftChild(m_items[pos]) != -1) pos = ItemHandler::leftChild(m_items[pos]); else return pos; } } int takeLeftMostChild(int pos) const { int parent = -1; while(1) { if(ItemHandler::leftChild(m_items[pos]) != -1) { parent = pos; pos = ItemHandler::leftChild(m_items[pos]); } else { ItemHandler::setLeftChild(m_items[parent], -1); return pos; } } } int rightMostChild(int pos) const { while(1) { if(ItemHandler::rightChild(m_items[pos]) != -1) pos = ItemHandler::rightChild(m_items[pos]); else return pos; } } int takeRightMostChild(int pos) const { int parent = -1; while(1) { if(ItemHandler::rightChild(m_items[pos]) != -1) { parent = pos; pos = ItemHandler::rightChild(m_items[pos]); } else { ItemHandler::setRightChild(m_items[parent], -1); return pos; } } } ///Maximum "moving" out of the way of items without forcing a complete rebuild of the list inline int maxMoveAround() const { return increaseFraction * rebuildIfInsertionMoreExpensive; } ///Inserts the given data item into position @p pos, and updates the sorting ///@param data can be another empty item, that together with @p pos represents the closest enclosure of the target position ///@return Whether the item could be inserted. It is not inserted if bool insertSorted(const Data& data, int pos, int start, int end, bool force) { if(pos < start) start = pos; if(pos >= end) end = pos+1; /* for(int a = start; a < end; ++a) { if(a != pos) { Q_ASSERT(!ItemHandler::isFree(m_items[a])); } }*/ EmbeddedTreeAlgorithms alg(m_items, m_itemCount, *m_centralFreeItem); int bound = alg.lowerBound(data, start, end); //Now find the position that should be taken if(bound == -1) bound = end; //Now our item should end up right before bound int target; //bound cannot be pos, because pos is invalid Q_ASSERT(bound != pos); #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif //Shuffle around the item at the free pos, so reference counting in constructors/destructors is not screwed up char backup[sizeof(Data)]; memcpy(backup, m_items+pos, sizeof(Data)); if(bound < pos) { if(!force && pos-bound > maxMoveAround()) { // qCDebug(UTIL) << "increasing because" << pos-bound << ">" << maxMoveAround() << "left free items:" << countFreeItems(*m_centralFreeItem) << "target free items:" << (m_itemCount-countFreeItems(*m_centralFreeItem))/increaseFraction; return false; } //Move [bound, pos) one to right, and insert at bound memmove(m_items+bound+1, m_items+bound, sizeof(Data)*(pos-bound)); target = bound; }else { Q_ASSERT(bound > pos); if(!force && bound-pos-1 > maxMoveAround()) { // qCDebug(UTIL) << "increasing because" << bound-pos-1 << ">" << maxMoveAround() << "left free items:" << countFreeItems(*m_centralFreeItem)<< "target free items:" << (m_itemCount-countFreeItems(*m_centralFreeItem))/increaseFraction; return false; } //Move (pos, bound) one to left, and insert at bound-1 memmove(m_items+pos, m_items+pos+1, sizeof(Data)*(bound-pos-1)); target = bound-1; } memcpy(m_items+target, backup, sizeof(Data)); +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) #pragma GCC diagnostic pop #endif ItemHandler::copyTo(data, m_items[target]); return true; } const Data& m_add; Data* m_items; uint m_itemCount; int* m_centralFreeItem; bool m_applied, m_needResize; }; /**Use this to add items. * The removed item must be in the set! * General usage: * - Construct the object * - Check if newItemCount() equals the previous item-count. If not, construct * a new list as large as newItemCount, and call object.transferData to transfer the data * into the new list. The new size must match the returned newItemCount. * However this may also be ignored if the memory-saving is not wanted in that moment. * */ template class EmbeddedTreeRemoveItem { public: EmbeddedTreeRemoveItem(Data* items, uint itemCount, int& centralFreeItem, const Data& remove) : m_remove(remove), m_items(items), m_itemCount(itemCount), m_centralFreeItem(¢ralFreeItem), m_insertedAtDepth(0) { apply(); } ~EmbeddedTreeRemoveItem() { } ///Check this to see whether a new item-count is needed. If this does not equal the given itemCount, then ///the data needs to be transferred into a new list using transferData uint newItemCount() const { uint maxFreeItems = ((m_itemCount / increaseFraction)*3)/2 + 1; //First we approximate the count of free items using the insertion depth if((1u << m_insertedAtDepth) >= maxFreeItems) { uint freeCount = countFreeItems(*m_centralFreeItem); if(freeCount > maxFreeItems || freeCount == m_itemCount) { return m_itemCount - freeCount; } } return m_itemCount; } ///Transfers the data into a new item-list. The size of the new item-list must equal newItemCount() void transferData(Data* newItems, uint newCount, int* newCentralFree = nullptr) { DEBUG_FREEITEM_COUNT //We only transfer into a new list when all the free items are used up //Create a list where only the non-free items exist uint offset = 0; for(uint a = 0; a < m_itemCount; ++a) { if(!ItemHandler::isFree(m_items[a])) { newItems[offset] = m_items[a]; ++offset; } } Q_ASSERT(offset == newCount); if(newCentralFree) m_centralFreeItem = newCentralFree; *m_centralFreeItem = -1; m_items = newItems; m_itemCount = newCount; DEBUG_FREEITEM_COUNT } private: void verifyTreeConsistent(int position, int lowerBound, int upperBound) { if(position == -1) return; Q_ASSERT(lowerBound <= position && position < upperBound); verifyTreeConsistent(ItemHandler::leftChild(m_items[position]), lowerBound, position); verifyTreeConsistent(ItemHandler::rightChild(m_items[position]), position+1, upperBound); } uint countFreeItems(int item) const { if(item == -1) return 0; const Data& current(m_items[item]); return 1 + countFreeItems(ItemHandler::leftChild(current)) + countFreeItems(ItemHandler::rightChild(current)); } int findItem(const Data& data, uint start, uint end) { EmbeddedTreeAlgorithms alg(m_items, m_itemCount, *m_centralFreeItem); return alg.indexOf(data, start, end); } void apply() { DEBUG_FREEITEM_COUNT int removeIndex = findItem(m_remove, 0, m_itemCount); Q_ASSERT(removeIndex != -1); Q_ASSERT(!ItemHandler::isFree(m_items[removeIndex])); //Find the free item that is nearest to the target position in the item order int currentItem = *m_centralFreeItem; int lowerBound = 0; //The minimum position the searched item can have int upperBound = m_itemCount; //The lowest known position the searched item can _not_ have if(*m_centralFreeItem == -1) { *m_centralFreeItem = removeIndex; Q_ASSERT(*m_centralFreeItem != -1); ItemHandler::createFreeItem(m_items[*m_centralFreeItem]); return; } //Now go down the chain, always into the items direction ///@todo make the structure better: Don't just put left/right child, but also swap when neede /// to balance the tree while(1) { Q_ASSERT(removeIndex != currentItem); Data& current(m_items[currentItem]); ++m_insertedAtDepth; if(removeIndex < currentItem) { upperBound = currentItem; //Follow left child if(ItemHandler::leftChild(current) != -1) { //Continue traversing currentItem = ItemHandler::leftChild(current); Q_ASSERT(currentItem >= lowerBound && currentItem < upperBound); }else{ //The to-be deleted item is before current, and can be added as left child to current int item = findItem(m_remove, lowerBound, upperBound); Q_ASSERT(item == removeIndex); ItemHandler::createFreeItem(m_items[item]); Q_ASSERT(item == -1 || item < currentItem); ItemHandler::setLeftChild(current, item); Q_ASSERT(item >= lowerBound && item < upperBound); break; } }else{ lowerBound = currentItem+1; //Follow right child if(ItemHandler::rightChild(current) != -1) { //Continue traversing currentItem = ItemHandler::rightChild(current); Q_ASSERT(currentItem >= lowerBound && currentItem < upperBound); }else{ //The to-be deleted item is behind current, and can be added as right child to current int item = findItem(m_remove, lowerBound, upperBound); Q_ASSERT(item == removeIndex); ItemHandler::createFreeItem(m_items[item]); Q_ASSERT(item == -1 || currentItem < item); ItemHandler::setRightChild(current, item); Q_ASSERT(item >= lowerBound && item < upperBound); break; } } } DEBUG_FREEITEM_COUNT } void debugFreeItemCount() { uint count = 0; for(uint a = 0; a < m_itemCount; ++a) { if(ItemHandler::isFree(m_items[a])) ++count; } uint counted = countFreeItems(*m_centralFreeItem); Q_ASSERT(count == counted); Q_UNUSED(counted); } const Data& m_remove; Data* m_items; uint m_itemCount; int* m_centralFreeItem; int m_insertedAtDepth; }; } #endif