Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,18 @@ struct _dictkeysobject {
/* Number of used entries in dk_entries. */
Py_ssize_t dk_nentries;

/* Offset to entries within this allocation.
/* Actual hash table of dk_size entries. It holds indices in dk_entries,
or DKIX_EMPTY(-1) or DKIX_DUMMY(-2).
PyDictKeysObject * points to dk_refcnt. The actual hash table
(dk_indices) is stored immediately before the struct in memory;
see _DK_INDICES_END() and _DK_INDICES_BASE().
dk_indices marks the start of the entries array and is used by
DK_ENTRIES() / DK_UNICODE_ENTRIES(). */
char dk_indices[]; /* char is required to avoid strict aliasing. */

/* dk_indices is the actual hash table of dk_size entries. It holds
indices in dk_entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2).
Indices must be: 0 <= indice < USABLE_FRACTION(dk_size).
Expand All @@ -212,10 +221,6 @@ struct _dictkeysobject {
- 8 bytes otherwise (int64_t*)
Dynamically sized, SIZEOF_VOID_P is minimum. */
char dk_indices[]; /* char is required to avoid strict aliasing. */

/* "PyDictKeyEntry or PyDictUnicodeEntry dk_entries[USABLE_FRACTION(DK_SIZE(dk))];" array follows:
see the DK_ENTRIES() / DK_UNICODE_ENTRIES() functions below */
};

/* This must be no more than 250, for the prefix size to fit in one byte. */
Expand Down Expand Up @@ -243,10 +248,21 @@ struct _dictvalues {
#define DK_SIZE(dk) (1<<DK_LOG_SIZE(dk))
#endif

static inline void* _DK_INDICES_END(const PyDictKeysObject *dk) {
return (void *)dk;
}

static inline void* _DK_INDICES_BASE(const PyDictKeysObject *dk) {
size_t indices_size = (size_t)1 << dk->dk_log2_index_bytes;
return (char *)dk - indices_size;
}

static inline void* _DK_ALLOC_BASE(PyDictKeysObject *dk) {
return _DK_INDICES_BASE(dk);
}

static inline void* _DK_ENTRIES(PyDictKeysObject *dk) {
int8_t *indices = (int8_t*)(dk->dk_indices);
size_t index = (size_t)1 << dk->dk_log2_index_bytes;
return (&indices[index]);
return (void *)(&dk->dk_indices[0]);
}

static inline PyDictKeyEntry* DK_ENTRIES(PyDictKeysObject *dk) {
Expand Down
15 changes: 15 additions & 0 deletions Lib/test/test_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -1711,6 +1711,21 @@ def __hash__(self):
self.assertEqual(dict_getitem_knownhash(d, k1, hash(k1)), 1)
self.assertRaises(Exc, dict_getitem_knownhash, d, k2, hash(k2))

@support.cpython_only
def test_indices_layout(self):
_testinternalcapi = import_helper.import_module('_testinternalcapi')
check_layout = _testinternalcapi.dict_check_indices_layout

dicts = [
{},
{i: i for i in range(10)},
{i: i for i in range(200)},
{i: i for i in range(2000)},
{i: i for i in range(70000)},
]
for d in dicts:
with self.subTest(size=len(d)):
self.assertTrue(check_layout(d))

from test import mapping_tests

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Restructure ``PyDictKeysObject`` memory layout so the indices array is stored
before the object header, and update dict index access accordingly. In local
dict-operation microbenchmarks this was about 1.4% faster overall, with most
operations improving by roughly 1-2% (:gh:`142889`).
50 changes: 50 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1903,6 +1903,55 @@ dict_getitem_knownhash(PyObject *self, PyObject *args)
return Py_XNewRef(result);
}

static size_t
dict_index_bytes_for_keys(PyDictKeysObject *keys)
{
int index_shift = keys->dk_log2_index_bytes - DK_LOG_SIZE(keys);
if (index_shift == 0) {
return 1;
}
if (index_shift == 1) {
return 2;
}
if (index_shift == 3) {
#if SIZEOF_VOID_P > 4
return 8;
#endif
/* Py_EMPTY_KEYS uses dk_log2_index_bytes=3 even on 32-bit builds. */
return 4;
}
assert(index_shift == 2);
return 4;
}

static PyObject*
dict_check_indices_layout(PyObject *self, PyObject *arg)
{
if (!PyAnyDict_Check(arg)) {
PyErr_SetString(PyExc_TypeError, "expected a dict");
return NULL;
}

PyDictObject *mp = (PyDictObject *)arg;
PyDictKeysObject *keys = mp->ma_keys;

size_t indices_size = (size_t)1 << keys->dk_log2_index_bytes;
char *base = (char *)_DK_ALLOC_BASE(keys);
char *header = (char *)keys;
char *entries = (char *)_DK_ENTRIES(keys);

bool ok = true;
ok &= (header == base + indices_size);
ok &= (entries == header + sizeof(PyDictKeysObject));

size_t index_bytes = dict_index_bytes_for_keys(keys);
char *idx_base = (char *)_DK_INDICES_BASE(keys);
/* Index 0 is stored immediately before the header. */
char *idx0 = (char *)_DK_INDICES_END(keys) - (ptrdiff_t)index_bytes;
ok &= (idx0 == idx_base + indices_size - (ptrdiff_t)index_bytes);

return PyBool_FromLong(ok);
}

static int
_init_interp_config_from_object(PyInterpreterConfig *config, PyObject *obj)
Expand Down Expand Up @@ -2902,6 +2951,7 @@ static PyMethodDef module_functions[] = {
{"get_object_dict_values", get_object_dict_values, METH_O},
{"hamt", new_hamt, METH_NOARGS},
{"dict_getitem_knownhash", dict_getitem_knownhash, METH_VARARGS},
{"dict_check_indices_layout", dict_check_indices_layout, METH_O},
{"create_interpreter", _PyCFunction_CAST(create_interpreter),
METH_VARARGS | METH_KEYWORDS},
{"destroy_interpreter", _PyCFunction_CAST(destroy_interpreter),
Expand Down
96 changes: 64 additions & 32 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here:

layout:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this layout comment need updating (move dk_entries[] to the top and demark where a PyDictKeysObject * actually points to)?

Also, I feel like the definition for PyDictKeysObject (struct _dictkeysobject) needs updating:

struct _dictkeysobject {
Py_ssize_t dk_refcnt;
/* Size of the hash table (dk_indices). It must be a power of 2. */
uint8_t dk_log2_size;
/* Size of the hash table (dk_indices) by bytes. */
uint8_t dk_log2_index_bytes;
/* Kind of keys */
uint8_t dk_kind;
#ifdef Py_GIL_DISABLED
/* Lock used to protect shared keys */
PyMutex dk_mutex;
#endif
/* Version number -- Reset to 0 by any modification to keys */
uint32_t dk_version;
/* Number of usable entries in dk_entries. */
Py_ssize_t dk_usable;
/* Number of used entries in dk_entries. */
Py_ssize_t dk_nentries;
/* Actual hash table of dk_size entries. It holds indices in dk_entries,
or DKIX_EMPTY(-1) or DKIX_DUMMY(-2).
Indices must be: 0 <= indice < USABLE_FRACTION(dk_size).
The size in bytes of an indice depends on dk_size:
- 1 byte if dk_size <= 0xff (char*)
- 2 bytes if dk_size <= 0xffff (int16_t*)
- 4 bytes if dk_size <= 0xffffffff (int32_t*)
- 8 bytes otherwise (int64_t*)
Dynamically sized, SIZEOF_VOID_P is minimum. */
char dk_indices[]; /* char is required to avoid strict aliasing. */
/* "PyDictKeyEntry or PyDictUnicodeEntry dk_entries[USABLE_FRACTION(DK_SIZE(dk))];" array follows:
see the DK_ENTRIES() / DK_UNICODE_ENTRIES() functions below */
};

Probably just change char dk_indices[] to char dk_entries[] and add a comment saying that dk_indicies will be put above the header. I'm not sure about the consequences of renaming dk_indices though, so you could also just add a comment.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, was trying to keep diff small but agreed this is no longer applicable


+---------------------+
| dk_indices[] |
| |
+---------------------+
| dk_refcnt |
| dk_log2_size |
Expand All @@ -25,13 +28,16 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here:
| dk_usable |
| dk_nentries |
+---------------------+
| dk_indices[] |
| |
+---------------------+
| dk_entries[] |
| |
+---------------------+

PyDictKeysObject * points to the start of the struct (dk_refcnt). The
dk_indices table is stored immediately before this struct in memory.

NOTE: dk_mutex is present in free-threaded builds, in between dk_kind and
dk_version.

dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1)
or DKIX_DUMMY(-2).
Size of indices is dk_size. Type of each index in indices varies with dk_size:
Expand Down Expand Up @@ -176,8 +182,8 @@ ASSERT_DICT_LOCKED(PyObject *op)

#define IS_DICT_SHARED(mp) _PyObject_GC_IS_SHARED(mp)
#define SET_DICT_SHARED(mp) _PyObject_GC_SET_SHARED(mp)
#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)keys->dk_indices)[idx]);
#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)keys->dk_indices)[idx], (int##size##_t)value);
#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)]);
#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)], (int##size##_t)value);
#define ASSERT_OWNED_OR_SHARED(mp) \
assert(_Py_IsOwnedByCurrentThread((PyObject *)mp) || IS_DICT_SHARED(mp));

Expand Down Expand Up @@ -256,8 +262,8 @@ static inline void split_keys_entry_added(PyDictKeysObject *keys)
#define UNLOCK_KEYS_IF_SPLIT(keys, kind)
#define IS_DICT_SHARED(mp) (false)
#define SET_DICT_SHARED(mp)
#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)(keys->dk_indices))[idx]
#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)(keys->dk_indices))[idx] = (int##size##_t)value
#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)]
#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)] = (int##size##_t)value

static inline void split_keys_entry_added(PyDictKeysObject *keys)
{
Expand Down Expand Up @@ -513,14 +519,14 @@ dictkeys_get_index(const PyDictKeysObject *keys, Py_ssize_t i)
int log2size = DK_LOG_SIZE(keys);
Py_ssize_t ix;

if (log2size < 8) {
if (keys->dk_log2_index_bytes == log2size) {
ix = LOAD_INDEX(keys, 8, i);
}
else if (log2size < 16) {
else if (keys->dk_log2_index_bytes == log2size + 1) {
ix = LOAD_INDEX(keys, 16, i);
}
#if SIZEOF_VOID_P > 4
else if (log2size >= 32) {
else if (keys->dk_log2_index_bytes == log2size + 3) {
ix = LOAD_INDEX(keys, 64, i);
}
#endif
Expand All @@ -540,16 +546,16 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
assert(ix >= DKIX_DUMMY);
assert(keys->dk_version == 0);

if (log2size < 8) {
if (keys->dk_log2_index_bytes == log2size) {
assert(ix <= 0x7f);
STORE_INDEX(keys, 8, i, ix);
}
else if (log2size < 16) {
else if (keys->dk_log2_index_bytes == log2size + 1) {
assert(ix <= 0x7fff);
STORE_INDEX(keys, 16, i, ix);
}
#if SIZEOF_VOID_P > 4
else if (log2size >= 32) {
else if (keys->dk_log2_index_bytes == log2size + 3) {
STORE_INDEX(keys, 64, i, ix);
}
#endif
Expand Down Expand Up @@ -626,7 +632,15 @@ estimate_log2_keysize(Py_ssize_t n)
* See https://github.com/python/cpython/pull/127568#discussion_r1868070614
* for the rationale of using dk_log2_index_bytes=3 instead of 0.
*/
static PyDictKeysObject empty_keys_struct = {
typedef struct {
int8_t indices[8];
PyDictKeysObject keys;
} _PyDict_EmptyKeysStorage;

static const _PyDict_EmptyKeysStorage empty_keys_storage = {
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY},
{
_Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */
0, /* dk_log2_size */
3, /* dk_log2_index_bytes */
Expand All @@ -637,11 +651,11 @@ static PyDictKeysObject empty_keys_struct = {
1, /* dk_version */
0, /* dk_usable (immutable) */
0, /* dk_nentries */
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
{},
}
};

#define Py_EMPTY_KEYS &empty_keys_struct
#define Py_EMPTY_KEYS ((PyDictKeysObject *)&empty_keys_storage.keys)

/* Uncomment to check the dict content in _PyDict_CheckConsistency() */
// #define DEBUG_PYDICT
Expand All @@ -655,6 +669,7 @@ static PyDictKeysObject empty_keys_struct = {
static inline int
get_index_from_order(PyDictObject *mp, Py_ssize_t i)
{
Py_BUILD_ASSERT(offsetof(_PyDict_EmptyKeysStorage, keys) == 8);
assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
assert(i < mp->ma_values->size);
uint8_t *array = get_insertion_order_array(mp->ma_values);
Expand Down Expand Up @@ -809,18 +824,27 @@ new_keys_object(uint8_t log2_size, bool unicode)
}

PyDictKeysObject *dk = NULL;
size_t indices_size = (size_t)1 << log2_bytes;
void *base = NULL;

if (log2_size == PyDict_LOG_MINSIZE && unicode) {
dk = _Py_FREELIST_POP_MEM(dictkeys);
base = _Py_FREELIST_POP_MEM(dictkeys);
if (base != NULL) {
dk = (PyDictKeysObject *)((char *)base + indices_size);
}
}
if (dk == NULL) {
dk = PyMem_Malloc(sizeof(PyDictKeysObject)
+ ((size_t)1 << log2_bytes)
+ entry_size * usable);
if (dk == NULL) {

if (base == NULL) {
base = PyMem_Malloc(indices_size
+ sizeof(PyDictKeysObject)
+ entry_size * usable);
if (base == NULL) {
PyErr_NoMemory();
return NULL;
}
dk = (PyDictKeysObject *)((char *)base + indices_size);
}

#ifdef Py_REF_DEBUG
_Py_IncRefTotal(_PyThreadState_GET());
#endif
Expand All @@ -834,25 +858,28 @@ new_keys_object(uint8_t log2_size, bool unicode)
dk->dk_nentries = 0;
dk->dk_usable = usable;
dk->dk_version = 0;
memset(&dk->dk_indices[0], 0xff, ((size_t)1 << log2_bytes));
memset(&dk->dk_indices[(size_t)1 << log2_bytes], 0, entry_size * usable);
memset(_DK_INDICES_BASE(dk), 0xff, indices_size);
memset(&dk->dk_indices[0], 0, entry_size * usable);
return dk;
}

static void
free_keys_object(PyDictKeysObject *keys, bool use_qsbr)
{
void *base = _DK_ALLOC_BASE(keys);

#ifdef Py_GIL_DISABLED
if (use_qsbr) {
_PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys));
_PyMem_FreeDelayed(base, _PyDict_KeysSize(keys));
return;
}
#endif

if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE && keys->dk_kind == DICT_KEYS_UNICODE) {
_Py_FREELIST_FREE(dictkeys, keys, PyMem_Free);
_Py_FREELIST_FREE(dictkeys, base, PyMem_Free);
}
else {
PyMem_Free(keys);
PyMem_Free(base);
}
}

Expand Down Expand Up @@ -950,14 +977,19 @@ clone_combined_dict_keys(PyDictObject *orig)

ASSERT_DICT_LOCKED(orig);

size_t keys_size = _PyDict_KeysSize(orig->ma_keys);
PyDictKeysObject *keys = PyMem_Malloc(keys_size);
if (keys == NULL) {
PyDictKeysObject *orig_keys = orig->ma_keys;
size_t keys_size = _PyDict_KeysSize(orig_keys);
size_t indices_size = (size_t)1 << orig_keys->dk_log2_index_bytes;

void *base = PyMem_Malloc(keys_size);
if (base == NULL) {
PyErr_NoMemory();
return NULL;
}

memcpy(keys, orig->ma_keys, keys_size);
PyDictKeysObject *keys = (PyDictKeysObject *)((char *)base + indices_size);

memcpy(base, _DK_ALLOC_BASE(orig_keys), keys_size);

/* After copying key/value pairs, we need to incref all
keys and values and they are about to be co-owned by a
Expand Down
11 changes: 0 additions & 11 deletions Tools/gdb/libpython.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,19 +835,8 @@ def write_repr(self, out, visited):
@staticmethod
def _get_entries(keys):
dk_nentries = int(keys['dk_nentries'])
dk_size = 1<<int(keys['dk_log2_size'])

if dk_size <= 0xFF:
offset = dk_size
elif dk_size <= 0xFFFF:
offset = 2 * dk_size
elif dk_size <= 0xFFFFFFFF:
offset = 4 * dk_size
else:
offset = 8 * dk_size

ent_addr = keys['dk_indices'].address
ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
if int(keys['dk_kind']) == 0: # DICT_KEYS_GENERAL
ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
else:
Expand Down
Loading