Hash table replacement for skip lists in ID code (#600)

* Committing clang-format changes

* Brings over hash table code from Bitbucket

* Can be switched between skip list and hash table implementation
  with H5_USE_ID_HASH_TABLE #define
* Not yet updated to use iterate-safe delete

* Fixes a warning and changes where the problematic test is
commented out.

* Adds mark-and-sweep flags and members

* Final fixes for hash table ID code

* Removes skip list ID code

* Committing clang-format changes

* Formatted source

* Adds a comment about the mark-and-sweep scheme.

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Dana Robinson
2021-05-04 13:51:26 -07:00
committed by GitHub
parent c7d45c205e
commit dbe7204085
8 changed files with 1327 additions and 69 deletions

View File

@@ -1114,6 +1114,7 @@
./src/hdf5.h
./src/libhdf5.settings.in
./src/H5win32defs.h
./src/uthash.h
./test/AtomicWriterReader.txt
./test/H5srcdir.h

View File

@@ -971,6 +971,8 @@ set (H5_PRIVATE_HEADERS
${HDF5_SRC_DIR}/H5Zprivate.h
${HDF5_SRC_DIR}/H5win32defs.h
${HDF5_SRC_DIR}/uthash.h
)
set (H5_GENERATED_HEADERS

View File

@@ -30,7 +30,6 @@
#include "H5Gprivate.h" /* Groups */
#include "H5Ipkg.h" /* IDs */
#include "H5RSprivate.h" /* Reference-counted strings */
#include "H5SLprivate.h" /* Skip Lists */
#include "H5Tprivate.h" /* Datatypes */
#include "H5VLprivate.h" /* Virtual Object Layer */
@@ -86,6 +85,7 @@ H5I__id_dump_cb(void *_item, void H5_ATTR_UNUSED *_key, void *_udata)
HDfprintf(stderr, " id = %" PRIdHID "\n", info->id);
HDfprintf(stderr, " count = %u\n", info->count);
HDfprintf(stderr, " obj = 0x%8p\n", info->object);
HDfprintf(stderr, " marked = %d\n", info->marked);
/* Get the group location, so we get get the name */
switch (type) {
@@ -173,6 +173,9 @@ H5I_dump_ids_for_type(H5I_type_t type)
if (type_info) {
H5I_id_info_t *item = NULL;
H5I_id_info_t *tmp = NULL;
/* Header */
HDfprintf(stderr, " init_count = %u\n", type_info->init_count);
HDfprintf(stderr, " reserved = %u\n", type_info->cls->reserved);
@@ -182,7 +185,17 @@ H5I_dump_ids_for_type(H5I_type_t type)
/* List */
if (type_info->id_count > 0) {
HDfprintf(stderr, " List:\n");
H5SL_iterate(type_info->ids, H5I__id_dump_cb, &type);
/* Normally we care about the callback's return value
* (H5I_ITER_CONT, etc.), but this is an iteration over all
* the IDs so we don't care.
*
* XXX: Update this to emit an error message on errors?
*/
HDfprintf(stderr, " (HASH TABLE)\n");
HASH_ITER(hh, type_info->hash_table, item, tmp)
{
H5I__id_dump_cb((void *)item, NULL, (void *)&type);
}
}
}
else

View File

@@ -29,7 +29,6 @@
#include "H5FLprivate.h" /* Free Lists */
#include "H5Ipkg.h" /* IDs */
#include "H5MMprivate.h" /* Memory management */
#include "H5SLprivate.h" /* Skip Lists */
#include "H5Tprivate.h" /* Datatypes */
#include "H5VLprivate.h" /* Virtual Object Layer */
@@ -75,7 +74,7 @@ typedef struct {
/********************/
static void * H5I__unwrap(void *object, H5I_type_t type);
static htri_t H5I__clear_type_cb(void *_id, void *key, void *udata);
static herr_t H5I__mark_node(void *_id, void *key, void *udata);
static void * H5I__remove_common(H5I_type_info_t *type_info, hid_t id);
static int H5I__dec_ref(hid_t id, void **request);
static int H5I__dec_app_ref(hid_t id, void **request);
@@ -96,6 +95,9 @@ int H5I_next_type_g = (int)H5I_NTYPES;
/* Declare a free list to manage the H5I_id_info_t struct */
H5FL_DEFINE_STATIC(H5I_id_info_t);
/* Whether deletes are actually marks (for mark-and-sweep) */
hbool_t H5I_marking_g = FALSE;
/*****************************/
/* Library Private Variables */
/*****************************/
@@ -131,7 +133,7 @@ H5I_term_package(void)
/* Count the number of types still in use */
for (i = 0; i < H5I_next_type_g; i++)
if ((type_info = H5I_type_info_array_g[i]) && type_info->ids)
if ((type_info = H5I_type_info_array_g[i]) && type_info->hash_table)
in_use++;
/* If no types are still being used then clean up */
@@ -139,7 +141,7 @@ H5I_term_package(void)
for (i = 0; i < H5I_next_type_g; i++) {
type_info = H5I_type_info_array_g[i];
if (type_info) {
HDassert(NULL == type_info->ids);
HDassert(NULL == type_info->hash_table);
type_info = H5MM_xfree(type_info);
H5I_type_info_array_g[i] = NULL;
in_use++;
@@ -196,8 +198,7 @@ H5I_register_type(const H5I_class_t *cls)
type_info->id_count = 0;
type_info->nextid = cls->reserved;
type_info->last_id_info = NULL;
if (NULL == (type_info->ids = H5SL_create(H5SL_TYPE_HID, NULL)))
HGOTO_ERROR(H5E_ID, H5E_CANTCREATE, FAIL, "skip list creation failed")
type_info->hash_table = NULL;
}
/* Increment the count of the times this type has been initialized */
@@ -205,13 +206,9 @@ H5I_register_type(const H5I_class_t *cls)
done:
/* Clean up on error */
if (ret_value < 0) {
if (type_info) {
if (type_info->ids)
H5SL_close(type_info->ids);
if (ret_value < 0)
if (type_info)
H5MM_free(type_info);
}
}
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5I_register_type() */
@@ -311,7 +308,9 @@ H5I__unwrap(void *object, H5I_type_t type)
herr_t
H5I_clear_type(H5I_type_t type, hbool_t force, hbool_t app_ref)
{
H5I_clear_type_ud_t udata; /* udata struct for callback */
H5I_clear_type_ud_t udata; /* udata struct for callback */
H5I_id_info_t * item = NULL;
H5I_id_info_t * tmp = NULL;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
@@ -328,33 +327,57 @@ H5I_clear_type(H5I_type_t type, hbool_t force, hbool_t app_ref)
udata.force = force;
udata.app_ref = app_ref;
/* Attempt to free all ids in the type */
if (H5SL_try_free_safe(udata.type_info->ids, H5I__clear_type_cb, &udata) < 0)
HGOTO_ERROR(H5E_ID, H5E_CANTDELETE, FAIL, "can't free ids in type")
/* Clearing a type is done in two phases (mark-and-sweep). This is because
* the type's free callback can free other IDs, potentially corrupting
* the data structure during the traversal.
*/
/* Set marking flag */
H5I_marking_g = TRUE;
/* Mark nodes for deletion */
HASH_ITER(hh, udata.type_info->hash_table, item, tmp)
{
if (!item->marked)
if (H5I__mark_node((void *)item, NULL, (void *)&udata) < 0)
HGOTO_ERROR(H5E_ID, H5E_BADITER, FAIL, "iteration failed while clearing the ID type")
}
/* Unset marking flag */
H5I_marking_g = FALSE;
/* Perform sweep */
HASH_ITER(hh, udata.type_info->hash_table, item, tmp)
{
if (item->marked) {
HASH_DELETE(hh, udata.type_info->hash_table, item);
item = H5FL_FREE(H5I_id_info_t, item);
}
}
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5I_clear_type() */
/*-------------------------------------------------------------------------
* Function: H5I__clear_type_cb
* Function: H5I__mark_node
*
* Purpose: Attempts to free the specified ID, calling the free
* function for the object.
* Purpose: Attempts to mark the node for freeing and calls the free
* function for the object, if any
*
* Return: TRUE/FALSE/FAIL
* Return: SUCCEED/FAIL
*
* Programmer: Neil Fortner
* Friday, July 10, 2015
*
*-------------------------------------------------------------------------
*/
static htri_t
H5I__clear_type_cb(void *_info, void H5_ATTR_UNUSED *key, void *_udata)
static herr_t
H5I__mark_node(void *_info, void H5_ATTR_UNUSED *key, void *_udata)
{
H5I_id_info_t * info = (H5I_id_info_t *)_info; /* Current ID info being worked with */
H5I_clear_type_ud_t *udata = (H5I_clear_type_ud_t *)_udata; /* udata struct */
htri_t ret_value = FALSE; /* Return value */
H5I_id_info_t * info = (H5I_id_info_t *)_info; /* Current ID info being worked with */
H5I_clear_type_ud_t *udata = (H5I_clear_type_ud_t *)_udata; /* udata struct */
hbool_t mark = FALSE;
FUNC_ENTER_STATIC_NOERR
@@ -383,12 +406,12 @@ H5I__clear_type_cb(void *_info, void H5_ATTR_UNUSED *key, void *_udata)
#endif /* H5I_DEBUG */
/* Indicate node should be removed from list */
ret_value = TRUE;
mark = TRUE;
}
}
else {
/* Indicate node should be removed from list */
ret_value = TRUE;
mark = TRUE;
}
}
else {
@@ -406,28 +429,28 @@ H5I__clear_type_cb(void *_info, void H5_ATTR_UNUSED *key, void *_udata)
#endif /* H5I_DEBUG */
/* Indicate node should be removed from list */
ret_value = TRUE;
mark = TRUE;
}
}
else {
/* Indicate node should be removed from list */
ret_value = TRUE;
mark = TRUE;
}
}
H5_GCC_DIAG_ON("cast-qual")
/* Remove ID if requested */
if (ret_value) {
/* Free ID info */
info = H5FL_FREE(H5I_id_info_t, info);
if (mark) {
/* Mark ID for deletion */
info->marked = TRUE;
/* Decrement the number of IDs in the type */
udata->type_info->id_count--;
}
}
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5I__clear_type_cb() */
FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5I__mark_node() */
/*-------------------------------------------------------------------------
* Function: H5I__destroy_type
@@ -471,9 +494,8 @@ H5I__destroy_type(H5I_type_t type)
if (type_info->cls->flags & H5I_CLASS_IS_APPLICATION)
type_info->cls = H5MM_xfree_const(type_info->cls);
if (H5SL_close(type_info->ids) < 0)
HGOTO_ERROR(H5E_ID, H5E_CANTCLOSEOBJ, FAIL, "can't close skip list")
type_info->ids = NULL;
HASH_CLEAR(hh, type_info->hash_table);
type_info->hash_table = NULL;
type_info = H5MM_xfree(type_info);
@@ -531,10 +553,10 @@ H5I__register(H5I_type_t type, const void *object, hbool_t app_ref, H5I_future_r
info->is_future = (NULL != realize_cb);
info->realize_cb = realize_cb;
info->discard_cb = discard_cb;
info->marked = FALSE;
/* Insert into the type */
if (H5SL_insert(type_info->ids, info, &info->id) < 0)
HGOTO_ERROR(H5E_ID, H5E_CANTINSERT, H5I_INVALID_HID, "can't insert ID node into skip list")
HASH_ADD(hh, type_info->hash_table, id, sizeof(hid_t), info);
type_info->id_count++;
type_info->nextid++;
@@ -643,10 +665,10 @@ H5I_register_using_existing_id(H5I_type_t type, void *object, hbool_t app_ref, h
info->is_future = FALSE;
info->realize_cb = NULL;
info->discard_cb = NULL;
info->marked = FALSE;
/* Insert into the type */
if (H5SL_insert(type_info->ids, info, &info->id) < 0)
HGOTO_ERROR(H5E_ID, H5E_CANTINSERT, FAIL, "can't insert ID node into skip list")
HASH_ADD(hh, type_info->hash_table, id, sizeof(hid_t), info);
type_info->id_count++;
/* Set the most recent ID to this object */
@@ -900,9 +922,17 @@ H5I__remove_common(H5I_type_info_t *type_info, hid_t id)
/* Sanity check */
HDassert(type_info);
/* Get the ID node for the ID */
if (NULL == (info = (H5I_id_info_t *)H5SL_remove(type_info->ids, &id)))
HGOTO_ERROR(H5E_ID, H5E_CANTDELETE, NULL, "can't remove ID node from skip list")
/* Delete or mark the node */
HASH_FIND(hh, type_info->hash_table, &id, sizeof(hid_t), info);
if (info) {
HDassert(!info->marked);
if (!H5I_marking_g)
HASH_DELETE(hh, type_info->hash_table, info);
else
info->marked = TRUE;
}
else
HGOTO_ERROR(H5E_ID, H5E_CANTDELETE, NULL, "can't remove ID node from hash table")
/* Check if this ID was the last one accessed */
if (type_info->last_id_info == info)
@@ -912,7 +942,8 @@ H5I__remove_common(H5I_type_info_t *type_info, hid_t id)
ret_value = (void *)info->object; /* (Casting away const OK -QAK) */
H5_GCC_DIAG_ON("cast-qual")
info = H5FL_FREE(H5I_id_info_t, info);
if (!H5I_marking_g)
info = H5FL_FREE(H5I_id_info_t, info);
/* Decrement the number of IDs in the type */
(type_info->id_count)--;
@@ -1555,8 +1586,9 @@ H5I_iterate(H5I_type_t type, H5I_search_func_t func, void *udata, hbool_t app_re
/* Only iterate through ID list if it is initialized and there are IDs in type */
if (type_info && type_info->init_count > 0 && type_info->id_count > 0) {
H5I_iterate_ud_t iter_udata; /* User data for iteration callback */
herr_t iter_status; /* Iteration status */
H5I_iterate_ud_t iter_udata; /* User data for iteration callback */
H5I_id_info_t * item = NULL;
H5I_id_info_t * tmp = NULL;
/* Set up iterator user data */
iter_udata.user_func = func;
@@ -1565,8 +1597,16 @@ H5I_iterate(H5I_type_t type, H5I_search_func_t func, void *udata, hbool_t app_re
iter_udata.obj_type = type;
/* Iterate over IDs */
if ((iter_status = H5SL_iterate(type_info->ids, H5I__iterate_cb, &iter_udata)) < 0)
HGOTO_ERROR(H5E_ID, H5E_BADITER, FAIL, "iteration failed")
HASH_ITER(hh, type_info->hash_table, item, tmp)
{
if (!item->marked) {
int ret = H5I__iterate_cb((void *)item, NULL, (void *)&iter_udata);
if (H5_ITER_ERROR == ret)
HGOTO_ERROR(H5E_ID, H5E_BADITER, FAIL, "iteration failed")
if (H5_ITER_STOP == ret)
break;
}
}
}
done:
@@ -1607,8 +1647,7 @@ H5I__find_id(hid_t id)
if (type_info->last_id_info && type_info->last_id_info->id == id)
id_info = type_info->last_id_info;
else {
/* Locate the ID node for the ID */
id_info = (H5I_id_info_t *)H5SL_search(type_info->ids, &id);
HASH_FIND(hh, type_info->hash_table, &id, sizeof(hid_t), id_info);
/* Remember this ID */
type_info->last_id_info = id_info;
@@ -1724,8 +1763,9 @@ H5I_find_id(const void *object, H5I_type_t type, hid_t *id)
/* Only iterate through ID list if it is initialized and there are IDs in type */
if (type_info->init_count > 0 && type_info->id_count > 0) {
H5I_get_id_ud_t udata; /* User data */
herr_t iter_status; /* Iteration status */
H5I_get_id_ud_t udata; /* User data */
H5I_id_info_t * item = NULL;
H5I_id_info_t * tmp = NULL;
/* Set up iterator user data */
udata.object = object;
@@ -1733,8 +1773,14 @@ H5I_find_id(const void *object, H5I_type_t type, hid_t *id)
udata.ret_id = H5I_INVALID_HID;
/* Iterate over IDs for the ID type */
if ((iter_status = H5SL_iterate(type_info->ids, H5I__find_id_cb, &udata)) < 0)
HGOTO_ERROR(H5E_ID, H5E_BADITER, FAIL, "iteration failed")
HASH_ITER(hh, type_info->hash_table, item, tmp)
{
int ret = H5I__find_id_cb((void *)item, NULL, (void *)&udata);
if (H5_ITER_ERROR == ret)
HGOTO_ERROR(H5E_ID, H5E_BADITER, FAIL, "iteration failed")
if (H5_ITER_STOP == ret)
break;
}
*id = udata.ret_id;
}

View File

@@ -29,8 +29,14 @@
/* Get package's private header */
#include "H5Iprivate.h"
/* Other private headers needed by this file */
#include "H5SLprivate.h" /* Skip Lists */
/* uthash is an external, header-only hash table implementation.
*
* We include the file directly in src/ and #define a few functions
* to use our internal memory calls.
*/
#define uthash_malloc(sz) H5MM_malloc(sz)
#define uthash_free(ptr, sz) H5MM_free(ptr) /* Ignoring sz is intentional */
#include "uthash.h"
/**************************/
/* Package Private Macros */
@@ -72,6 +78,10 @@ typedef struct H5I_id_info_t {
hbool_t is_future; /* Whether this ID represents a future object */
H5I_future_realize_func_t realize_cb; /* 'realize' callback for future object */
H5I_future_discard_func_t discard_cb; /* 'discard' callback for future object */
/* Hash table ID fields */
hbool_t marked; /* Marked for deletion */
UT_hash_handle hh; /* Hash table handle (must be LAST) */
} H5I_id_info_t;
/* Type information structure used */
@@ -81,7 +91,7 @@ typedef struct H5I_type_info_t {
uint64_t id_count; /* Current number of IDs held */
uint64_t nextid; /* ID to use for the next object */
H5I_id_info_t * last_id_info; /* Info for most recent ID looked up */
H5SL_t * ids; /* Pointer to skip list that stores IDs */
H5I_id_info_t * hash_table; /* Hash table pointer for this ID type */
} H5I_type_info_t;
/*****************************/
@@ -91,12 +101,13 @@ typedef struct H5I_type_info_t {
/* Array of pointers to ID types */
H5_DLLVAR H5I_type_info_t *H5I_type_info_array_g[H5I_MAX_NUM_TYPES];
/* Variable to keep track of the number of types allocated. Its value is the */
/* next type ID to be handed out, so it is always one greater than the number */
/* of types. */
/* Starts at 1 instead of 0 because it makes trace output look nicer. If more */
/* types (or IDs within a type) are needed, adjust TYPE_BITS in H5Ipkg.h */
/* and/or increase size of hid_t */
/* Variable to keep track of the number of types allocated. Its value is the
* next type ID to be handed out, so it is always one greater than the number
* of types.
* Starts at 1 instead of 0 because it makes trace output look nicer. If more
* types (or IDs within a type) are needed, adjust TYPE_BITS in H5Ipkg.h
* and/or increase size of hid_t
*/
H5_DLLVAR int H5I_next_type_g;
/******************************/

View File

@@ -1512,7 +1512,8 @@ H5T__unlock_cb(void *_dt, hid_t H5_ATTR_UNUSED id, void *_udata)
FUNC_ENTER_STATIC_NOERR
HDassert(dt && dt->shared);
HDassert(dt);
HDassert(dt->shared);
if (H5T_STATE_IMMUTABLE == dt->shared->state) {
dt->shared->state = H5T_STATE_RDONLY;

1184
src/uthash.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -715,7 +715,7 @@ test_remove_clear_type(void)
for (i = 0; i < RCT_NITER; i++) {
/* The number of members in the type, according to the HDF5 library */
hsize_t nmembers = 1234567;
hsize_t nmembers = 1234567; /* (init to fake number) */
/* The number of objects found while scanning through the object list */
int found;
@@ -801,7 +801,7 @@ test_remove_clear_type(void)
if (ret == FAIL)
goto error;
VERIFY(nmembers, found, "The number of members remaining in the type did not match our count");
if (nmembers != found)
if (nmembers != (hsize_t)found)
goto error;
/*****************************************