diff --git a/serialization/itemrepository.h b/serialization/itemrepository.h --- a/serialization/itemrepository.h +++ b/serialization/itemrepository.h @@ -54,9 +54,6 @@ #define IF_ENSURE_REACHABLE(x) #endif -///Do not enable this #define, the issue it catches is non-critical and happens on a regular basis -// #define DEBUG_HASH_SEQUENCES - #define ITEMREPOSITORY_USE_MMAP_LOADING //Assertion macro that prevents warnings if debugging is disabled @@ -1364,106 +1361,49 @@ m_metaDataChanged = true; - unsigned short bucket = (index >> 16); - - unsigned int hash = itemFromIndex(index)->hash(); - - short unsigned int* bucketHashPosition = m_firstBucketForHash + (hash % bucketHashSize); - short unsigned int previousBucketNumber = *bucketHashPosition; - - Q_ASSERT(previousBucketNumber); - - if(previousBucketNumber == bucket) - previousBucketNumber = 0; - - MyBucket* previousBucketPtr = 0; + const uint hash = itemFromIndex(index)->hash(); + const ushort bucket = (index >> 16); //Apart from removing the item itself, we may have to recreate the nextBucketForHash link, so we need the previous bucket - - while(previousBucketNumber) { - //We have a bucket that contains an item with the given hash % bucketHashSize, so check if the item is already there - - previousBucketPtr = m_buckets[previousBucketNumber]; - if(!previousBucketPtr) { - initializeBucket(previousBucketNumber); - previousBucketPtr = m_buckets[previousBucketNumber]; + MyBucket* previousBucketPtr = nullptr; + MyBucket* const bucketPtr = walkBucketChain(hash, + [bucket, &previousBucketPtr](ushort bucketIdx, MyBucket* bucketPtr) -> MyBucket* { + if (bucket != bucketIdx) { + previousBucketPtr = bucketPtr; + return nullptr; } + return bucketPtr; // found bucket, stop looking + }); - short unsigned int nextBucket = previousBucketPtr->nextBucketForHash(hash); - Q_ASSERT(nextBucket); - if(nextBucket == bucket) - break; //Now previousBucketNumber - else - previousBucketNumber = nextBucket; - } - - //Make sure the index was reachable through the hashes - Q_ASSERT(previousBucketNumber || *bucketHashPosition == bucket); - - MyBucket* bucketPtr = m_buckets[bucket]; - if(!bucketPtr) { - initializeBucket(bucket); - bucketPtr = m_buckets[bucket]; - } + //Make sure the index was reachable through the hash chain + Q_ASSERT(bucketPtr); --m_statItemCount; bucketPtr->deleteItem(index, hash, *this); /** - * Now check whether the link previousBucketNumber -> bucket is still needed. + * Now check whether the link root/previousBucketNumber -> bucket is still needed. */ - ///@todo Clear the nextBucketForHash links when not needed any more, by doing setNextBucketForHash(hash, 0); - //return; ///@todo Find out what this problem is about. If we don't return here, some items become undiscoverable through hashes after some time - if(previousBucketNumber == 0) { - //The item is directly in the m_firstBucketForHash hash - //Put the next item in the nextBucketForHash chain into m_firstBucketForHash that has a hash clashing in that array. - Q_ASSERT(*bucketHashPosition == bucket); - IF_ENSURE_REACHABLE(unsigned short previous = bucket;) - auto nextBucket = bucketPtr; - while(!nextBucket->hasClashingItem(hash, bucketHashSize)) - { - unsigned short next = nextBucket->nextBucketForHash(hash); - ENSURE_REACHABLE(next); - ENSURE_REACHABLE(previous); - - *bucketHashPosition = next; - - ENSURE_REACHABLE(previous); - ENSURE_REACHABLE(next); - - IF_ENSURE_REACHABLE(previous = next;) - if(next) { - nextBucket = m_buckets[next]; - - if(!nextBucket) - { - initializeBucket(next); - nextBucket = m_buckets[next]; - } - }else{ - break; + if (!previousBucketPtr) { + // This bucket is linked in the m_firstBucketForHash array, find the next clashing bucket in the chain + // There may be items in the chain that clash only with MyBucket::NextBucketHashSize, skipped here + m_firstBucketForHash[hash % bucketHashSize] = walkBucketChain(hash, [hash](ushort bucketIdx, MyBucket *bucketPtr){ + if (bucketPtr->hasClashingItem(hash, bucketHashSize)) { + return bucketIdx; } - } - }else{ - if(!bucketPtr->hasClashingItem(hash, MyBucket::NextBucketHashSize)) { - ///Debug: Check for infinite recursion - walkBucketLinks(*bucketHashPosition, hash); - - Q_ASSERT(previousBucketPtr->nextBucketForHash(hash) == bucket); - - ENSURE_REACHABLE(bucket); + return static_cast(0); + }); + } else if(!bucketPtr->hasClashingItem(hash, MyBucket::NextBucketHashSize)) { + // TODO: Skip clashing items reachable from m_firstBucketForHash + // (see note in usePermissiveModuloWhenRemovingClashLinks() test) - previousBucketPtr->setNextBucketForHash(hash, bucketPtr->nextBucketForHash(hash)); + ENSURE_REACHABLE(bucket); - ENSURE_REACHABLE(bucket); + previousBucketPtr->setNextBucketForHash(hash, bucketPtr->nextBucketForHash(hash)); - ///Debug: Check for infinite recursion - Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, bucketPtr->nextBucketForHash(hash))); - - Q_ASSERT(bucketPtr->nextBucketForHash(hash) != previousBucketNumber); - } + Q_ASSERT(m_buckets[bucketPtr->nextBucketForHash(hash)] != previousBucketPtr); } ENSURE_REACHABLE(bucket); @@ -1472,7 +1412,7 @@ //Convert the monster-bucket back to multiple normal buckets, and put them into the free list uint newBuckets = bucketPtr->monsterBucketExtent()+1; Q_ASSERT(bucketPtr->isEmpty()); - if (!previousBucketNumber) { + if (!previousBucketPtr) { // see https://bugs.kde.org/show_bug.cgi?id=272408 // the monster bucket will be deleted and new smaller ones created // the next bucket for this hash is invalid anyways as done above @@ -1489,10 +1429,6 @@ }else{ putIntoFreeList(bucket, bucketPtr); } - -#ifdef DEBUG_HASH_SEQUENCES - Q_ASSERT(*bucketHashPosition == 0 || bucketForIndex(*bucketHashPosition)->hasClashingItem(hash, bucketHashSize)); -#endif } ///This returns an editable version of the item. @warning: Never change an entry that affects the hash, @@ -1839,7 +1775,7 @@ unsigned short bucketIndex = m_firstBucketForHash[hash % bucketHashSize]; while (bucketIndex) { - const auto* bucketPtr = m_buckets[bucketIndex]; + auto* bucketPtr = m_buckets[bucketIndex]; if (!bucketPtr) { initializeBucket(bucketIndex); bucketPtr = m_buckets[bucketIndex]; diff --git a/serialization/tests/test_itemrepository.cpp b/serialization/tests/test_itemrepository.cpp --- a/serialization/tests/test_itemrepository.cpp +++ b/serialization/tests/test_itemrepository.cpp @@ -244,6 +244,113 @@ QVERIFY(!repository.findIndex(TestItemRequest(*monsterItem, true))); repository.deleteItem(smallIndex); } + void usePermissiveModuloWhenRemovingClashLinks() + { + KDevelop::ItemRepository repository("PermissiveModulo"); + + // FIXME: These are private to ItemRepository, but should be available to tests + const uint bucketHashSize = 1048460; + const uint nextBucketHashSize = 140; + auto bucketForIndex = [](const uint index) { + return index >> 16; + }; + + const uint clashValue = 2; + + // Choose sizes that ensure that the items fit in the desired buckets + const uint bigItemSize = KDevelop::ItemRepositoryBucketSize * 0.55 - 1; + const uint smallItemSize = KDevelop::ItemRepositoryBucketSize * 0.25 - 1; + + // Will get placed in bucket 1 (bucket zero is invalid), so the root bucket table at position 'clashValue' will be '1' + const QScopedPointer firstChainFirstLink(createItem(clashValue, bigItemSize)); + const uint firstChainFirstLinkIndex = repository.index(*firstChainFirstLink); + QCOMPARE(bucketForIndex(firstChainFirstLinkIndex), 1u); + + // Will also get placed in bucket 1, so root bucket table at position 'nextBucketHashSize + clashValue' will be '1' + const QScopedPointer secondChainFirstLink(createItem(nextBucketHashSize + clashValue, smallItemSize)); + const uint secondChainFirstLinkIndex = repository.index(*secondChainFirstLink); + QCOMPARE(bucketForIndex(secondChainFirstLinkIndex), 1u); + + // Will get placed in bucket 2, so bucket 1's next hash table at position 'clashValue' will be '2' + const QScopedPointer firstChainSecondLink(createItem(bucketHashSize + clashValue, bigItemSize)); + const uint firstChainSecondLinkIndex = repository.index(*firstChainSecondLink); + QCOMPARE(bucketForIndex(firstChainSecondLinkIndex), 2u); + + // Will also get placed in bucket 2, reachable since bucket 1's next hash table at position 'clashValue' is '2' + const QScopedPointer secondChainSecondLink(createItem(bucketHashSize + nextBucketHashSize + clashValue, smallItemSize)); + const uint secondChainSecondLinkIndex = repository.index(*secondChainSecondLink); + QCOMPARE(bucketForIndex(secondChainSecondLinkIndex), 2u); + + /* + * At this point we have two chains in the repository, rooted at 'clashValue' and 'nextBucketHashSize + clashValue' + * Both of the chains start in bucket 1 and end in bucket 2, but both chains share the same link to bucket 2 + * This is because two of the hashes clash the other two when % bucketHashSize, but all of them clash % nextBucketHashSize + */ + + repository.deleteItem(firstChainSecondLinkIndex); + + /* + * Now we've deleted the second item in the first chain, this means the first chain no longer requires a link to the + * second bucket where that item was... but the link must remain, since it's shared (clashed) by the second chain. + * + * When cutting a link out of the middle of the chain, we need to check if its items clash using the "permissive" + * modulus (the size of the /next/ buckets map), which is always a factor of the "stricter" modulus (the size of the + * /root/ buckets map). + * + * This behavior implies that there will sometimes be useless buckets in the bucket chain for a given hash, so when + * cutting out the root link, it's safe to skip over them to the first clash with the 'stricter' modulus. + */ + + // The second item of the second chain must still be reachable + QCOMPARE(repository.findIndex(*secondChainSecondLink), secondChainSecondLinkIndex); + + /* + * As a memo to anyone who's still reading this, this also means the following situation can exist: + * + * bucketHashSize == 8 + * nextBucketHashSize == 4 + * U is a link table + * B is a bucket + * [...] are the hashes of the contained items + * + * U + * U + * U -> B1 + * U + * U + * U + * U -> B2 + * U + * + * B0 (Invalid) + * B1 -> [2, 6] + * U + * U + * U -> B3 + * U + * B2 -> [14] + * U + * U + * U -> B1 + * U + * B3 -> [10] + * U + * U + * U + * U + * + * The chain for hash 6 is: + * Root[6] -> B2[2] -> B1[2] -> B3 + * + * If you remove the item with hash 6, 6 and 2 will clash with mod 4 (permissive) + * + * So the useless link `B2[2] -> B1` will be preserved, even though its useless + * as the item with hash 2 is reachable directly from the root. + * + * So TODO: Don't preserve links to items accessible from root buckets. This cannot + * be done correctly using only Bucket::hasClashingItem as of now. + */ + } }; #include "test_itemrepository.moc"