From 9eec9899ff4d58f139857b8ee108f8f4d7d7e65f Mon Sep 17 00:00:00 2001 From: clintonsteiner Date: Sun, 22 Feb 2026 00:47:28 -0600 Subject: [PATCH 1/4] gh-142889: Restructure PyDictKeysObject memory layout for simpler entry access Restructure dict keys allocation to store dk_indices before the PyDictKeysObject header and keep dk_entries after the header. Update dict index access and related allocation/free/clone paths, adjust gdb dict entry location logic, and add layout coverage tests. Local dict microbenchmarks showed about a 1.4% overall improvement, with most operations around 1-2% faster. --- Include/internal/pycore_dict.h | 18 +++- Lib/test/test_dict.py | 15 ++++ ...-02-22-00-50-00.gh-issue-142889.2u9mYk.rst | 4 + Modules/_testinternalcapi.c | 48 ++++++++++ Objects/dictobject.c | 87 ++++++++++++------- Tools/gdb/libpython.py | 11 --- 6 files changed, 138 insertions(+), 45 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-02-22-00-50-00.gh-issue-142889.2u9mYk.rst diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 59e88be6aeec12..8be5bdcc167ff7 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -198,7 +198,6 @@ struct _dictkeysobject { /* Number of used entries in dk_entries. */ Py_ssize_t dk_nentries; - /* Actual hash table of dk_size entries. It holds indices in dk_entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). @@ -243,10 +242,21 @@ struct _dictvalues { #define DK_SIZE(dk) (1<dk_log2_index_bytes; + return (char *)dk - indices_size; +} + +static inline void* _DK_ALLOC_BASE(PyDictKeysObject *dk) { + return _DK_INDICES_BASE(dk); +} + static inline void* _DK_ENTRIES(PyDictKeysObject *dk) { - int8_t *indices = (int8_t*)(dk->dk_indices); - size_t index = (size_t)1 << dk->dk_log2_index_bytes; - return (&indices[index]); + return (void *)(&dk->dk_indices[0]); } static inline PyDictKeyEntry* DK_ENTRIES(PyDictKeysObject *dk) { diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 14b501360d0b8e..e8d524c8a4ccc5 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1711,6 +1711,21 @@ def __hash__(self): self.assertEqual(dict_getitem_knownhash(d, k1, hash(k1)), 1) self.assertRaises(Exc, dict_getitem_knownhash, d, k2, hash(k2)) + @support.cpython_only + def test_indices_layout(self): + _testinternalcapi = import_helper.import_module('_testinternalcapi') + check_layout = _testinternalcapi.dict_check_indices_layout + + dicts = [ + {}, + {i: i for i in range(10)}, + {i: i for i in range(200)}, + {i: i for i in range(2000)}, + {i: i for i in range(70000)}, + ] + for d in dicts: + with self.subTest(size=len(d)): + self.assertTrue(check_layout(d)) from test import mapping_tests diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-22-00-50-00.gh-issue-142889.2u9mYk.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-22-00-50-00.gh-issue-142889.2u9mYk.rst new file mode 100644 index 00000000000000..07c95802fe2a31 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-22-00-50-00.gh-issue-142889.2u9mYk.rst @@ -0,0 +1,4 @@ +Restructure ``PyDictKeysObject`` memory layout so the indices array is stored +before the object header, and update dict index access accordingly. In local +dict-operation microbenchmarks this was about 1.4% faster overall, with most +operations improving by roughly 1-2% (:gh:`142889`). diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 22cfa3f58a9d83..50dc0413c3b17a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1903,6 +1903,53 @@ dict_getitem_knownhash(PyObject *self, PyObject *args) return Py_XNewRef(result); } +static size_t +dict_index_bytes_for_keys(PyDictKeysObject *keys) +{ + int index_shift = keys->dk_log2_index_bytes - DK_LOG_SIZE(keys); + if (index_shift == 0) { + return 1; + } + if (index_shift == 1) { + return 2; + } +#if SIZEOF_VOID_P > 4 + if (index_shift == 3) { + return 8; + } +#endif + assert(index_shift == 2); + return 4; +} + +static PyObject* +dict_check_indices_layout(PyObject *self, PyObject *arg) +{ + if (!PyAnyDict_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "expected a dict"); + return NULL; + } + + PyDictObject *mp = (PyDictObject *)arg; + PyDictKeysObject *keys = mp->ma_keys; + + size_t indices_size = (size_t)1 << keys->dk_log2_index_bytes; + char *base = (char *)_DK_ALLOC_BASE(keys); + char *header = (char *)keys; + char *entries = (char *)_DK_ENTRIES(keys); + + bool ok = true; + ok &= (header == base + indices_size); + ok &= (entries == header + sizeof(PyDictKeysObject)); + + size_t index_bytes = dict_index_bytes_for_keys(keys); + char *idx_base = (char *)_DK_INDICES_BASE(keys); + /* Index 0 is stored immediately before the header. */ + char *idx0 = (char *)_DK_INDICES_END(keys) - (ptrdiff_t)index_bytes; + ok &= (idx0 == idx_base + indices_size - (ptrdiff_t)index_bytes); + + return PyBool_FromLong(ok); +} static int _init_interp_config_from_object(PyInterpreterConfig *config, PyObject *obj) @@ -2902,6 +2949,7 @@ static PyMethodDef module_functions[] = { {"get_object_dict_values", get_object_dict_values, METH_O}, {"hamt", new_hamt, METH_NOARGS}, {"dict_getitem_knownhash", dict_getitem_knownhash, METH_VARARGS}, + {"dict_check_indices_layout", dict_check_indices_layout, METH_O}, {"create_interpreter", _PyCFunction_CAST(create_interpreter), METH_VARARGS | METH_KEYWORDS}, {"destroy_interpreter", _PyCFunction_CAST(destroy_interpreter), diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 6c802ca569d48c..a74e93fdeac392 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -16,7 +16,6 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here: layout: -+---------------------+ | dk_refcnt | | dk_log2_size | | dk_log2_index_bytes | @@ -176,8 +175,8 @@ ASSERT_DICT_LOCKED(PyObject *op) #define IS_DICT_SHARED(mp) _PyObject_GC_IS_SHARED(mp) #define SET_DICT_SHARED(mp) _PyObject_GC_SET_SHARED(mp) -#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)keys->dk_indices)[idx]); -#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)keys->dk_indices)[idx], (int##size##_t)value); +#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)]); +#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)], (int##size##_t)value); #define ASSERT_OWNED_OR_SHARED(mp) \ assert(_Py_IsOwnedByCurrentThread((PyObject *)mp) || IS_DICT_SHARED(mp)); @@ -256,8 +255,8 @@ static inline void split_keys_entry_added(PyDictKeysObject *keys) #define UNLOCK_KEYS_IF_SPLIT(keys, kind) #define IS_DICT_SHARED(mp) (false) #define SET_DICT_SHARED(mp) -#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)(keys->dk_indices))[idx] -#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)(keys->dk_indices))[idx] = (int##size##_t)value +#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)] +#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)] = (int##size##_t)value static inline void split_keys_entry_added(PyDictKeysObject *keys) { @@ -513,14 +512,14 @@ dictkeys_get_index(const PyDictKeysObject *keys, Py_ssize_t i) int log2size = DK_LOG_SIZE(keys); Py_ssize_t ix; - if (log2size < 8) { + if (keys->dk_log2_index_bytes == log2size) { ix = LOAD_INDEX(keys, 8, i); } - else if (log2size < 16) { + else if (keys->dk_log2_index_bytes == log2size + 1) { ix = LOAD_INDEX(keys, 16, i); } #if SIZEOF_VOID_P > 4 - else if (log2size >= 32) { + else if (keys->dk_log2_index_bytes == log2size + 3) { ix = LOAD_INDEX(keys, 64, i); } #endif @@ -540,16 +539,16 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix) assert(ix >= DKIX_DUMMY); assert(keys->dk_version == 0); - if (log2size < 8) { + if (keys->dk_log2_index_bytes == log2size) { assert(ix <= 0x7f); STORE_INDEX(keys, 8, i, ix); } - else if (log2size < 16) { + else if (keys->dk_log2_index_bytes == log2size + 1) { assert(ix <= 0x7fff); STORE_INDEX(keys, 16, i, ix); } #if SIZEOF_VOID_P > 4 - else if (log2size >= 32) { + else if (keys->dk_log2_index_bytes == log2size + 3) { STORE_INDEX(keys, 64, i, ix); } #endif @@ -626,7 +625,15 @@ estimate_log2_keysize(Py_ssize_t n) * See https://github.com/python/cpython/pull/127568#discussion_r1868070614 * for the rationale of using dk_log2_index_bytes=3 instead of 0. */ -static PyDictKeysObject empty_keys_struct = { +typedef struct { + int8_t indices[8]; + PyDictKeysObject keys; +} _PyDict_EmptyKeysStorage; + +static _PyDict_EmptyKeysStorage empty_keys_storage = { + {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, + DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, + { _Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */ 0, /* dk_log2_size */ 3, /* dk_log2_index_bytes */ @@ -637,11 +644,14 @@ static PyDictKeysObject empty_keys_struct = { 1, /* dk_version */ 0, /* dk_usable (immutable) */ 0, /* dk_nentries */ - {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, - DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */ + {}, + } }; -#define Py_EMPTY_KEYS &empty_keys_struct +static_assert(offsetof(_PyDict_EmptyKeysStorage, keys) == 8, + "empty_keys_storage layout mismatch"); + +#define Py_EMPTY_KEYS (&empty_keys_storage.keys) /* Uncomment to check the dict content in _PyDict_CheckConsistency() */ // #define DEBUG_PYDICT @@ -809,18 +819,27 @@ new_keys_object(uint8_t log2_size, bool unicode) } PyDictKeysObject *dk = NULL; + size_t indices_size = (size_t)1 << log2_bytes; + void *base = NULL; + if (log2_size == PyDict_LOG_MINSIZE && unicode) { - dk = _Py_FREELIST_POP_MEM(dictkeys); + base = _Py_FREELIST_POP_MEM(dictkeys); + if (base != NULL) { + dk = (PyDictKeysObject *)((char *)base + indices_size); + } } - if (dk == NULL) { - dk = PyMem_Malloc(sizeof(PyDictKeysObject) - + ((size_t)1 << log2_bytes) - + entry_size * usable); - if (dk == NULL) { + + if (base == NULL) { + base = PyMem_Malloc(indices_size + + sizeof(PyDictKeysObject) + + entry_size * usable); + if (base == NULL) { PyErr_NoMemory(); return NULL; } + dk = (PyDictKeysObject *)((char *)base + indices_size); } + #ifdef Py_REF_DEBUG _Py_IncRefTotal(_PyThreadState_GET()); #endif @@ -834,25 +853,28 @@ new_keys_object(uint8_t log2_size, bool unicode) dk->dk_nentries = 0; dk->dk_usable = usable; dk->dk_version = 0; - memset(&dk->dk_indices[0], 0xff, ((size_t)1 << log2_bytes)); - memset(&dk->dk_indices[(size_t)1 << log2_bytes], 0, entry_size * usable); + memset(_DK_INDICES_BASE(dk), 0xff, indices_size); + memset(&dk->dk_indices[0], 0, entry_size * usable); return dk; } static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { + void *base = _DK_ALLOC_BASE(keys); + #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); + _PyMem_FreeDelayed(base, _PyDict_KeysSize(keys)); return; } #endif + if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE && keys->dk_kind == DICT_KEYS_UNICODE) { - _Py_FREELIST_FREE(dictkeys, keys, PyMem_Free); + _Py_FREELIST_FREE(dictkeys, base, PyMem_Free); } else { - PyMem_Free(keys); + PyMem_Free(base); } } @@ -950,14 +972,19 @@ clone_combined_dict_keys(PyDictObject *orig) ASSERT_DICT_LOCKED(orig); - size_t keys_size = _PyDict_KeysSize(orig->ma_keys); - PyDictKeysObject *keys = PyMem_Malloc(keys_size); - if (keys == NULL) { + PyDictKeysObject *orig_keys = orig->ma_keys; + size_t keys_size = _PyDict_KeysSize(orig_keys); + size_t indices_size = (size_t)1 << orig_keys->dk_log2_index_bytes; + + void *base = PyMem_Malloc(keys_size); + if (base == NULL) { PyErr_NoMemory(); return NULL; } - memcpy(keys, orig->ma_keys, keys_size); + PyDictKeysObject *keys = (PyDictKeysObject *)((char *)base + indices_size); + + memcpy(base, _DK_ALLOC_BASE(orig_keys), keys_size); /* After copying key/value pairs, we need to incref all keys and values and they are about to be co-owned by a diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index a85195dcd1016a..fa2bc6cabd1f1b 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -835,19 +835,8 @@ def write_repr(self, out, visited): @staticmethod def _get_entries(keys): dk_nentries = int(keys['dk_nentries']) - dk_size = 1< Date: Sun, 22 Feb 2026 01:11:53 -0600 Subject: [PATCH 2/4] ci fixes for wasl and c-global checker --- Modules/_testinternalcapi.c | 6 ++++-- Objects/dictobject.c | 4 +--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 50dc0413c3b17a..83800a4eb43778 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1913,11 +1913,13 @@ dict_index_bytes_for_keys(PyDictKeysObject *keys) if (index_shift == 1) { return 2; } -#if SIZEOF_VOID_P > 4 if (index_shift == 3) { +#if SIZEOF_VOID_P > 4 return 8; - } #endif + /* Py_EMPTY_KEYS uses dk_log2_index_bytes=3 even on 32-bit builds. */ + return 4; + } assert(index_shift == 2); return 4; } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a74e93fdeac392..09c868c0f4d6fc 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -648,9 +648,6 @@ static _PyDict_EmptyKeysStorage empty_keys_storage = { } }; -static_assert(offsetof(_PyDict_EmptyKeysStorage, keys) == 8, - "empty_keys_storage layout mismatch"); - #define Py_EMPTY_KEYS (&empty_keys_storage.keys) /* Uncomment to check the dict content in _PyDict_CheckConsistency() */ @@ -665,6 +662,7 @@ static_assert(offsetof(_PyDict_EmptyKeysStorage, keys) == 8, static inline int get_index_from_order(PyDictObject *mp, Py_ssize_t i) { + Py_BUILD_ASSERT(offsetof(_PyDict_EmptyKeysStorage, keys) == 8); assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE); assert(i < mp->ma_values->size); uint8_t *array = get_insertion_order_array(mp->ma_values); From 270837b2a4ae56540b54d2f0dc70444832fc96b4 Mon Sep 17 00:00:00 2001 From: clintonsteiner Date: Sun, 22 Feb 2026 01:13:45 -0600 Subject: [PATCH 3/4] ci fixes for wasl and c-global checker --- Objects/dictobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 09c868c0f4d6fc..97ff269ae68535 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -630,7 +630,7 @@ typedef struct { PyDictKeysObject keys; } _PyDict_EmptyKeysStorage; -static _PyDict_EmptyKeysStorage empty_keys_storage = { +static const _PyDict_EmptyKeysStorage empty_keys_storage = { {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, { @@ -648,7 +648,7 @@ static _PyDict_EmptyKeysStorage empty_keys_storage = { } }; -#define Py_EMPTY_KEYS (&empty_keys_storage.keys) +#define Py_EMPTY_KEYS ((PyDictKeysObject *)&empty_keys_storage.keys) /* Uncomment to check the dict content in _PyDict_CheckConsistency() */ // #define DEBUG_PYDICT From cc0a3561165b4692877d13ba47e80767ccfd86df Mon Sep 17 00:00:00 2001 From: clintonsteiner Date: Sun, 22 Feb 2026 07:02:09 -0600 Subject: [PATCH 4/4] address pr comments --- Include/internal/pycore_dict.h | 18 ++++++++++++------ Objects/dictobject.c | 13 ++++++++++--- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 8be5bdcc167ff7..12ab02ed5ca9f5 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -198,8 +198,18 @@ struct _dictkeysobject { /* Number of used entries in dk_entries. */ Py_ssize_t dk_nentries; - /* Actual hash table of dk_size entries. It holds indices in dk_entries, - or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). + /* Offset to entries within this allocation. + + PyDictKeysObject * points to dk_refcnt. The actual hash table + (dk_indices) is stored immediately before the struct in memory; + see _DK_INDICES_END() and _DK_INDICES_BASE(). + + dk_indices marks the start of the entries array and is used by + DK_ENTRIES() / DK_UNICODE_ENTRIES(). */ + char dk_indices[]; /* char is required to avoid strict aliasing. */ + + /* dk_indices is the actual hash table of dk_size entries. It holds + indices in dk_entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). Indices must be: 0 <= indice < USABLE_FRACTION(dk_size). @@ -211,10 +221,6 @@ struct _dictkeysobject { - 8 bytes otherwise (int64_t*) Dynamically sized, SIZEOF_VOID_P is minimum. */ - char dk_indices[]; /* char is required to avoid strict aliasing. */ - - /* "PyDictKeyEntry or PyDictUnicodeEntry dk_entries[USABLE_FRACTION(DK_SIZE(dk))];" array follows: - see the DK_ENTRIES() / DK_UNICODE_ENTRIES() functions below */ }; /* This must be no more than 250, for the prefix size to fit in one byte. */ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 97ff269ae68535..47dc54f0be83e2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -16,6 +16,10 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here: layout: ++---------------------+ +| dk_indices[] | +| | ++---------------------+ | dk_refcnt | | dk_log2_size | | dk_log2_index_bytes | @@ -24,13 +28,16 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here: | dk_usable | | dk_nentries | +---------------------+ -| dk_indices[] | -| | -+---------------------+ | dk_entries[] | | | +---------------------+ +PyDictKeysObject * points to the start of the struct (dk_refcnt). The +dk_indices table is stored immediately before this struct in memory. + +NOTE: dk_mutex is present in free-threaded builds, in between dk_kind and +dk_version. + dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). Size of indices is dk_size. Type of each index in indices varies with dk_size: