diff --git a/src/main/java/org/truffleruby/core/hash/HashLiteralNode.java b/src/main/java/org/truffleruby/core/hash/HashLiteralNode.java index 7fad6a79619..3e065e50749 100644 --- a/src/main/java/org/truffleruby/core/hash/HashLiteralNode.java +++ b/src/main/java/org/truffleruby/core/hash/HashLiteralNode.java @@ -37,7 +37,7 @@ protected HashLiteralNode(RubyNode[] keyValues) { public static HashLiteralNode create(RubyNode[] keyValues) { if (keyValues.length == 0) { return new EmptyHashStore.EmptyHashLiteralNode(); - } else if (keyValues.length <= PackedHashStoreLibrary.MAX_ENTRIES * 2 && false) { + } else if (keyValues.length <= PackedHashStoreLibrary.MAX_ENTRIES * 2) { return new PackedHashStoreLibrary.SmallHashLiteralNode(keyValues); } else { return bigHashTypeIsCompactHash diff --git a/src/main/java/org/truffleruby/core/hash/RubyHash.java b/src/main/java/org/truffleruby/core/hash/RubyHash.java index dbed8df3af2..76d2a898742 100644 --- a/src/main/java/org/truffleruby/core/hash/RubyHash.java +++ b/src/main/java/org/truffleruby/core/hash/RubyHash.java @@ -11,6 +11,20 @@ import java.util.Set; +import org.truffleruby.RubyContext; +import org.truffleruby.collections.PEBiFunction; +import org.truffleruby.core.hash.library.BucketsHashStore; +import org.truffleruby.core.hash.library.CompactHashStore; +import org.truffleruby.core.hash.library.HashStoreLibrary; +import org.truffleruby.core.klass.RubyClass; +import org.truffleruby.interop.ForeignToRubyNode; +import org.truffleruby.language.Nil; +import org.truffleruby.language.RubyDynamicObject; +import org.truffleruby.language.dispatch.DispatchNode; +import org.truffleruby.language.library.RubyLibrary; +import org.truffleruby.language.objects.ObjectGraph; +import org.truffleruby.language.objects.ObjectGraphNode; + import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Cached.Exclusive; @@ -24,20 +38,7 @@ import com.oracle.truffle.api.library.ExportLibrary; import com.oracle.truffle.api.library.ExportMessage; import com.oracle.truffle.api.object.Shape; - import com.oracle.truffle.api.profiles.ConditionProfile; -import org.truffleruby.RubyContext; -import org.truffleruby.collections.PEBiFunction; -import org.truffleruby.core.hash.library.BucketsHashStore; -import org.truffleruby.core.hash.library.HashStoreLibrary; -import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.interop.ForeignToRubyNode; -import org.truffleruby.language.Nil; -import org.truffleruby.language.RubyDynamicObject; -import org.truffleruby.language.dispatch.DispatchNode; -import org.truffleruby.language.library.RubyLibrary; -import org.truffleruby.language.objects.ObjectGraph; -import org.truffleruby.language.objects.ObjectGraphNode; @ExportLibrary(InteropLibrary.class) @ImportStatic(HashGuards.class) @@ -84,6 +85,8 @@ public String toString() { public void getAdjacentObjects(Set reachable) { if (store instanceof BucketsHashStore) { ((BucketsHashStore) store).getAdjacentObjects(reachable); + } else if (store instanceof CompactHashStore) { + ((CompactHashStore) store).getAdjacentObjects(reachable); } else { ObjectGraph.addProperty(reachable, store); } diff --git a/src/main/java/org/truffleruby/core/hash/library/CompactHashStore.java b/src/main/java/org/truffleruby/core/hash/library/CompactHashStore.java index 0d7b0576666..8d46c44f356 100644 --- a/src/main/java/org/truffleruby/core/hash/library/CompactHashStore.java +++ b/src/main/java/org/truffleruby/core/hash/library/CompactHashStore.java @@ -15,6 +15,7 @@ import static org.truffleruby.core.hash.library.HashStoreLibrary.EachEntryCallback; import java.util.Arrays; +import java.util.Set; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; @@ -28,6 +29,7 @@ import org.truffleruby.core.hash.RubyHash; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyNode; +import org.truffleruby.language.objects.ObjectGraph; import org.truffleruby.language.objects.shared.PropagateSharingNode; import com.oracle.truffle.api.dsl.Bind; @@ -72,7 +74,7 @@ public class CompactHashStore { // multiple not-equal hashes can map to the same position in the index array, that's a different sort of // "collision" in addition to the usual collision of equal hash values coming from different keys. Both kinds // of collisions actually result in the same outcome : contention on an index slot. So both are automatically - // handled by the same collision-resolution strategy : open-addressing-with linear proping.) + // handled by the same collision-resolution strategy : open-addressing-with linear probing.) // ----------------------------------------------- // (3) tl;dr: Compact Hashes give you both insertion-order iteration AND the usual const-time guarantees // --------------------------------------------------------------------------------------------------------------- @@ -91,34 +93,32 @@ public class CompactHashStore { // This tracks the number of occupied slots at which we should rebuild the index array // For example, at a load factor of 0.75 and an index of total size 100 slot, that number is 75 slots // (Technically, that's redundant information that is derived from the load factor and the index size, - // but deriving it requires a float multiplication or division, and we want to check for it on every insertion so + // but deriving it requires a float multiplication or division, and we want to check for it on every insertion, so // it's inefficient to keep calculating it every time its needed) private int numSlotsForIndexRebuild; // Each slot in the index array can be in one of 3 states, depending on the value of its second (offset) field : - // Offset >= 1 : Filled, the data in the hash field and the index field is valid. Subtracting one from the offset + // Offset >= 1 : Filled, the data in the hash field and the offset field is valid. Subtracting one from the offset // will yield a valid index into the KV array. - // Offset == 0 : Unused, the data in the hash field and the index field is NOT valid, and the slot was never filled + // Offset == 0 : Unused, the data in the hash field and the offset field is NOT valid, and the slot was never filled // with valid data. - // Offset == -1 : Deleted, the data in the hash field and the index field is NOT valid, but the slot used to be + // Offset == -1 : Deleted, the data in the hash field and the offset field is NOT valid, but the slot used to be // occupied with valid data before. private static final int INDEX_SLOT_UNUSED_MARKER = 0; private static final int INDEX_SLOT_DELETED_MARKER = -1; - // returned by code that does array search and doesn't find what it's looking for + // returned by methods doing array search which doesn't find what they're looking for private static final int KEY_NOT_FOUND = -2; - protected static final int HASH_NOT_FOUND = KEY_NOT_FOUND; + private static final int HASH_NOT_FOUND = KEY_NOT_FOUND; - // generic "not a valid array position" value to be used by all code doing array searches for things other than + // a generic "not a valid array position" value to be used by all code doing array searches for things other than // keys and hashes private static final int INVALID_ARRAY_POSITION = Integer.MIN_VALUE; - // Number of hash entries, not array positions (in general, capacities and sizes are always in entries) + // In hash entries, not array positions (in general, capacities and sizes are always in entries) public static final int DEFAULT_INITIAL_CAPACITY = 8; - public static final float THRESHOLD_LOAD_FACTOR_FOR_INDEX_REBUILD = 0.7f; - // temporary debugging aid so that its obvious the class is being used without being spam - private static boolean LOG_FIRST_ACCESS = false; + public static final float THRESHOLD_LOAD_FACTOR_FOR_INDEX_REBUILD = 0.7f; public CompactHashStore() { this(DEFAULT_INITIAL_CAPACITY); @@ -133,11 +133,6 @@ private CompactHashStore(int[] index, Object[] kvs, int kvStoreInsertionPos, int } public CompactHashStore(int capacity) { - // temporary and ugly - if (LOG_FIRST_ACCESS) { - System.out.println("~~~~~~~~~~~~~~~~~~~~~~~ Using Compact Hashes from Hash Tables That Don't Hate You."); - LOG_FIRST_ACCESS = false; // without this we would spam the output stream - } assertConstructorPreconditions(capacity); int kvCapacity = roundUpwardsToNearestPowerOf2(capacity); @@ -165,7 +160,7 @@ private static void assertConstructorPreconditions(int capacity) { @ExportMessage protected boolean set(RubyHash hash, Object key, Object value, boolean byIdentity, @Cached @Shared HashingNodes.ToHash hashFunction, - @Cached @Shared GetKeyPosInKVStoreNode getKey, + @Cached @Shared GetHashPosAndKvPosForKeyNode getHashPosAndKvPos, @Cached FreezeHashKeyIfNeededNode freezeKey, @Cached PropagateSharingNode propagateSharingForKey, @Cached PropagateSharingNode propagateSharingForVal, @@ -175,13 +170,15 @@ protected boolean set(RubyHash hash, Object key, Object value, boolean byIdentit @Bind("$node") Node node) { /* boilerplate argument for InlinedConditionProfile */ var frozenKey = freezeKey.executeFreezeIfNeeded(key, byIdentity); int keyHash = hashFunction.execute(key, byIdentity); - int keyKvPos = intPairSecond(getKey.execute(frozenKey, keyHash, byIdentity, index, kvStore)); + int keyKvPos = intPairSecond( + getHashPosAndKvPos.execute(frozenKey, keyHash, byIdentity, index, kvStore)); propagateSharingForKey.executePropagate(hash, frozenKey); propagateSharingForVal.executePropagate(hash, value); // key doesn't exist, so set is a relatively expensive insertion - if (keyNotFound.profile(node, keyKvPos == KEY_NOT_FOUND)) { + if (keyNotFound.profile(node, + keyKvPos == KEY_NOT_FOUND)) { resizeIndexIfNeeded(hash.size, indexResizingIsNotNeeded, node); resizeKvStoreIfNeeded(kvResizingIsNeeded, node); @@ -200,7 +197,7 @@ protected boolean set(RubyHash hash, Object key, Object value, boolean byIdentit @ExportMessage protected Object lookupOrDefault(Frame frame, RubyHash hash, Object key, PEBiFunction defaultNode, - @Cached @Shared GetKeyPosInKVStoreNode getKey, + @Cached @Shared GetHashPosAndKvPosForKeyNode getKey, @Cached @Shared HashingNodes.ToHash hashFunction, @Cached @Exclusive InlinedConditionProfile keyNotFound, @Bind("$node") Node node) { @@ -215,7 +212,7 @@ protected Object lookupOrDefault(Frame frame, RubyHash hash, Object key, PEBiFun @ExportMessage protected Object delete(RubyHash hash, Object key, - @Cached @Shared GetKeyPosInKVStoreNode getKey, + @Cached @Shared GetHashPosAndKvPosForKeyNode getKey, @Cached @Shared HashingNodes.ToHash hashFunction, @Cached @Exclusive InlinedConditionProfile keyNotFound, @Bind("$node") Node node) { @@ -348,7 +345,7 @@ private void insertIntoIndex(int keyHash, int kvPos) { boolean slotFilled = index[pos + 1] > INDEX_SLOT_UNUSED_MARKER; while (slotFilled) { - pos = (pos + 2) & (index.length - 1); + pos = incrementIndexPos(pos, index.length); slotFilled = index[pos + 1] > INDEX_SLOT_UNUSED_MARKER; } @@ -366,7 +363,8 @@ private void insertIntoKv(Object key, Object value) { private void resizeIndexIfNeeded(int size, InlinedConditionProfile indexResizingIsNotNeeded, Node node) { - if (indexResizingIsNotNeeded.profile(node, size < numSlotsForIndexRebuild)) { + if (indexResizingIsNotNeeded.profile(node, + size < numSlotsForIndexRebuild)) { return; } @@ -431,32 +429,58 @@ private static int getIndexPosFromHash(int h, int max) { return h & (max - 2); } - // Or KEY_NOT_FOUND if key doesn't exist + private static int incrementIndexPos(int pos, int max) { + return (pos + 2) & (max - 1); + } + + public void getAdjacentObjects(Set reachable) { + for (int i = 0; i < kvStoreInsertionPos; i += 2) { + if (kvStore[i] != null) { + ObjectGraph.addProperty(reachable, kvStore[i]); + ObjectGraph.addProperty(reachable, kvStore[i + 1]); + } + } + } + + /** Given : A key and its hash + *

+ * Returns : A pair of positions (array indices), the first of which is where the key's hash is stored in the index + * array, and the second is where the key itself is stored in the KV Store array + *

+ * If the key doesn't exist, returns KEY_NOT_FOUND NOTE : The node encodes a pair of integers as a long, this avoids + * allocations at a slight cost of readability */ @GenerateUncached - abstract static class GetKeyPosInKVStoreNode extends RubyBaseNode { + abstract static class GetHashPosAndKvPosForKeyNode extends RubyBaseNode { public abstract long execute(Object key, int hash, boolean compareByIdentity, int[] index, Object[] kvStore); @Specialization protected long getKeyPos(Object key, int hash, boolean compareByIdentity, int[] index, Object[] kvStore, @Cached CompareHashKeysNode compareHashKeysNode, - @Cached GetHashFirstPosInIndexNode getFirstHashPos, @Cached GetHashNextPosInIndexNode getNextHashPos, @Cached @Exclusive InlinedConditionProfile passedKeyIsEqualToFoundKey, @Bind("$node") Node node) { - int firstHashPosInIndex = getFirstHashPos.execute(hash, index); - int nextHashPos = firstHashPosInIndex; + int startPos = getIndexPosFromHash(hash, index.length); + long result = getNextHashPos.execute(startPos, hash, index, INVALID_ARRAY_POSITION, startPos); + int firstHashPosInIndex = intPairSecond(result); + int relocationPos = intPairFirst(result); + + int nextHashPos = firstHashPosInIndex; while (nextHashPos != HASH_NOT_FOUND) { int kvPos = index[nextHashPos + 1] - 1; Object otherKey = kvStore[kvPos]; int otherHash = index[nextHashPos]; + relocateHashIfPossible(nextHashPos, relocationPos, index); if (passedKeyIsEqualToFoundKey.profile(node, compareHashKeysNode.execute(compareByIdentity, key, hash, otherKey, otherHash))) { return intPair(nextHashPos, kvPos); } - nextHashPos = getNextHashPos.execute(nextHashPos, hash, index, firstHashPosInIndex); + int next = incrementIndexPos(nextHashPos, index.length); + result = getNextHashPos.execute(next, hash, index, relocationPos, startPos); + nextHashPos = intPairSecond(result); + relocationPos = intPairFirst(result); } return KEY_NOT_FOUND; @@ -475,81 +499,33 @@ private static int intPairSecond(long pair) { return (int) pair; } - // Or HASH_NOT_FOUND if hash doesn't exist - @GenerateUncached - abstract static class GetHashFirstPosInIndexNode extends RubyBaseNode { - public abstract int execute(int hash, int[] index); - - @Specialization - protected int getHashPos(int hash, int[] index, - @Cached @Exclusive InlinedConditionProfile deletedSlotIsDesiredAndFound, - @Cached @Exclusive InlinedConditionProfile hashValueFound, - @Cached @Exclusive InlinedConditionProfile hashValueNotFoundInEntireArray, - @Bind("$node") Node node) { - int pos = getIndexPosFromHash(hash, index.length); - - boolean slotUnused = index[pos + 1] == INDEX_SLOT_UNUSED_MARKER; - boolean slotDeleted = index[pos + 1] == INDEX_SLOT_DELETED_MARKER; - int firstDeletedSlot = INVALID_ARRAY_POSITION; - int firstPos = pos; - - while (!slotUnused) { - if (deletedSlotIsDesiredAndFound.profile(node, - slotDeleted /* found */ && firstDeletedSlot == INVALID_ARRAY_POSITION /* desired */)) { - firstDeletedSlot = pos; - } else if (hashValueFound.profile(node, /* slot is neither unused nor deleted, i.e. it's filled */ - index[pos] == hash)) { // is the hash there equal ? - // So we found the hash we want - // See if we encountered any deleted slots along the way - if (firstDeletedSlot != INVALID_ARRAY_POSITION) { - // and relocate the hash to the first deleted slot so that we find it faster the next time - index[firstDeletedSlot] = hash; - index[firstDeletedSlot + 1] = index[pos + 1]; - // mark the original slot as deleted - index[pos + 1] = INDEX_SLOT_DELETED_MARKER; - return firstDeletedSlot; // which now contains the element - } - return pos; - } - - // If the slot was deleted or filled with a different hash from the one we're looking for, keep going - pos = (pos + 2) & (index.length - 1); - // if we're back where we started, that means we traversed the entire array and didn't find the key - if (hashValueNotFoundInEntireArray.profile(node, - pos == firstPos)) { - return HASH_NOT_FOUND; - } - - slotUnused = index[pos + 1] == INDEX_SLOT_UNUSED_MARKER; - slotDeleted = index[pos + 1] == INDEX_SLOT_DELETED_MARKER; - } - return HASH_NOT_FOUND; - } - } - @GenerateUncached abstract static class GetHashNextPosInIndexNode extends RubyBaseNode { - public abstract int execute(int startingAfterPos, int hash, int[] index, int stop); + public abstract long execute(int startingFromPos, int hash, int[] index, int relocationPos, int stop); @Specialization - protected int getNextPosForHash(int startingAfterPos, int hash, int[] index, int stop, + protected long getNextHashPos(int startingFromPos, int hash, int[] index, int stop, @Cached @Exclusive InlinedConditionProfile slotIsDeleted, @Cached @Exclusive InlinedConditionProfile slotIsUnused, @Bind("$node") Node node) { - int nextHashPos = startingAfterPos; + int nextHashPos = startingFromPos; + int firstDeletedSlot = INVALID_ARRAY_POSITION; do { - nextHashPos = (nextHashPos + 2) & (index.length - 1); if (slotIsUnused.profile(node, index[nextHashPos + 1] == INDEX_SLOT_UNUSED_MARKER)) { return HASH_NOT_FOUND; } + if (slotIsDeleted.profile(node, index[nextHashPos + 1] == INDEX_SLOT_DELETED_MARKER)) { - continue; - } - if (index[nextHashPos] == hash) { - return nextHashPos; + if (firstDeletedSlot == INVALID_ARRAY_POSITION) { + firstDeletedSlot = nextHashPos; + } + } else if (index[nextHashPos] == hash) { + return intPair(firstDeletedSlot, nextHashPos); } + + nextHashPos = incrementIndexPos(nextHashPos, index.length); } while (nextHashPos != stop); return HASH_NOT_FOUND; @@ -562,21 +538,39 @@ abstract static class GetHashPosForKeyAtPosNode extends RubyBaseNode { @Specialization protected int getHashPos(int hash, int kvPos, int[] index, - @Cached GetHashFirstPosInIndexNode getFirstHashPos, @Cached GetHashNextPosInIndexNode getNextHashPos) { - int firstPos = getFirstHashPos.execute(hash, index); - int nextPos = firstPos; + int startPos = getIndexPosFromHash(hash, index.length); + long result = getNextHashPos.execute(startPos, hash, index, INVALID_ARRAY_POSITION, startPos); + + int firstHashPos = intPairSecond(result); + int relocationPos = intPairFirst(result); + + int nextPos = firstHashPos; while (nextPos != HASH_NOT_FOUND) { int kvPosition = index[nextPos + 1] - 1; + relocateHashIfPossible(nextPos, relocationPos, index); + if (kvPos == kvPosition) { return nextPos; } - nextPos = getNextHashPos.execute(nextPos, hash, index, firstPos); + + int next = incrementIndexPos(nextPos, index.length); + result = getNextHashPos.execute(next, hash, index, relocationPos, startPos); + nextPos = intPairSecond(result); + relocationPos = intPairFirst(result); } return HASH_NOT_FOUND; } } + private static void relocateHashIfPossible(int currPos, int relocationPos, int[] index) { + if (relocationPos != INVALID_ARRAY_POSITION) { + index[relocationPos] = index[currPos]; + index[relocationPos + 1] = index[currPos + 1]; + index[currPos + 1] = INDEX_SLOT_DELETED_MARKER; + } + } + public static class CompactHashLiteralNode extends HashLiteralNode { @Child HashStoreLibrary hashlib;