From 9ac8638d9d9e5c7f58f2d637c07894f398b51193 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 8 Sep 2011 17:13:51 -0700 Subject: [PATCH 1/4] More docs --- libraries/libmdb/mdb.c | 85 +++++++++++++++++++++++++++++++++++++----- libraries/libmdb/mdb.h | 7 +++- 2 files changed, 82 insertions(+), 10 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index b913d7d7d1..5a73e99317 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -543,17 +543,17 @@ typedef struct MDB_node { /** Size of the node header, excluding dynamic data at the end */ #define NODESIZE offsetof(MDB_node, mn_data) - /** Size of a node in a branch page. + /** Size of a node in a branch page with a given key. * This is just the node header plus the key, there is no data. */ #define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) - /** Size of a node in a leaf page. + /** Size of a node in a leaf page with a given key and data. * This is node header plus key plus data size. */ #define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) - /** Address of node \i in page \p */ + /** Address of node \b i in page \b p */ #define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) /** Address of the key for the node */ @@ -748,6 +748,7 @@ struct MDB_env { HANDLE me_fd; /**< The main data file */ HANDLE me_lfd; /**< The lock file */ HANDLE me_mfd; /**< just for writing the meta pages */ + /** Failed to update the meta page. Probably an I/O error. */ #define MDB_FATAL_ERROR 0x80000000U uint32_t me_flags; uint32_t me_extrapad; /**< unused for now */ @@ -805,8 +806,8 @@ static int mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data); static int mdb_rebalance(MDB_cursor *mc); static int mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key); -static int mdb_move_node(MDB_cursor *csrcrc, MDB_cursor *cdstst); -static int mdb_merge(MDB_cursor *csrcrc, MDB_cursor *cdstst); +static int mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst); +static int mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst); static int mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno); static MDB_page *mdb_new_page(MDB_cursor *mc, uint32_t flags, int num); @@ -842,6 +843,7 @@ static SECURITY_ATTRIBUTES mdb_all_sa; static int mdb_sec_inited; #endif +/** Return the library version info. */ char * mdb_version(int *major, int *minor, int *patch) { @@ -851,7 +853,7 @@ mdb_version(int *major, int *minor, int *patch) return MDB_VERSION_STRING; } - /** Table of descriptions for MDB @ref error codes */ +/** Table of descriptions for MDB @ref errors */ static char *const mdb_errstr[] = { "MDB_KEYEXIST: Key/data pair already exists", "MDB_NOTFOUND: No matching key/data pair found", @@ -874,6 +876,11 @@ mdb_strerror(int err) } #if DEBUG +/** Display a key in hexadecimal and return the address of the result. + * @param[in] key the key to display + * @param[in] buf the buffer to write into. Should always be #DKBUF. + * @return The key in hexadecimal form. + */ static char * mdb_dkey(MDB_val *key, char *buf) { @@ -882,6 +889,9 @@ mdb_dkey(MDB_val *key, char *buf) unsigned int i; if (key->mv_size > MAXKEYSIZE) return "MAXKEYSIZE"; + /* may want to make this a dynamic check: if the key is mostly + * printable characters, print it as-is instead of converting to hex. + */ #if 1 for (i=0; imv_size; i++) ptr += sprintf(ptr, "%02x", *c++); @@ -898,6 +908,15 @@ mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) return txn->mt_dbxs[dbi].md_cmp(a, b); } +/** Compare two data items according to a particular database. + * This returns a comparison as if the two items were data items of + * a sorted duplicates #MDB_DUPSORT database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_open() + * @param[in] a The first item to compare + * @param[in] b The second item to compare + * @return < 0 if a < b, 0 if a == b, > 0 if a > b + */ int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) { @@ -907,7 +926,15 @@ mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) return EINVAL; /* too bad you can't distinguish this from a valid result */ } -/* Allocate new page(s) for writing */ +/** Allocate pages for writing. + * If there are free pages available from older transactions, they + * will be re-used first. Otherwise a new page will be allocated. + * @param[in] mc cursor A cursor handle identifying the transaction and + * database for which we are allocating. + * @param[in] num the number of pages to allocate. + * @return Address of the allocated page(s). Requests for multiple pages + * will always be satisfied by a single contiguous chunk of memory. + */ static MDB_page * mdb_alloc_page(MDB_cursor *mc, int num) { @@ -1025,7 +1052,9 @@ mdb_alloc_page(MDB_cursor *mc, int num) return np; } -/* Touch a page: make it dirty and re-insert into tree with updated pgno. +/** Touch a page: make it dirty and re-insert into tree with updated pgno. + * @param[in] mc cursor pointing to the page to be touched + * @return 0 on success, non-zero on failure. */ static int mdb_touch(MDB_cursor *mc) @@ -1047,7 +1076,9 @@ mdb_touch(MDB_cursor *mc) mp->mp_flags |= P_DIRTY; mc->mc_pg[mc->mc_top] = mp; - /* Update the page number to new touched page. */ + /** If this page has a parent, update the parent to point to + * this new page. + */ if (mc->mc_top) SETPGNO(NODEPTR(mc->mc_pg[mc->mc_top-1], mc->mc_ki[mc->mc_top-1]), mp->mp_pgno); } @@ -1068,6 +1099,12 @@ mdb_env_sync(MDB_env *env, int force) static inline void mdb_txn_reset0(MDB_txn *txn); +/** Common code for #mdb_txn_begin() and #mdb_txn_renew(). + * @param[in] txn the transaction handle to initialize + * @return 0 on success, non-zero on failure. This can only + * fail for read-only transactions, and then only if the + * reader table is full. + */ static inline int mdb_txn_renew0(MDB_txn *txn) { @@ -1181,6 +1218,9 @@ mdb_txn_begin(MDB_env *env, unsigned int flags, MDB_txn **ret) return rc; } +/** Common code for #mdb_txn_reset() and #mdb_txn_abort(). + * @param[in] txn the transaction handle to reset + */ static inline void mdb_txn_reset0(MDB_txn *txn) { @@ -1213,6 +1253,7 @@ mdb_txn_reset0(MDB_txn *txn) env->me_txn = NULL; for (i=2; ime_numdbs; i++) env->me_dbxs[i].md_dirty = 0; + /* The writer mutex was locked in mdb_txn_begin. */ UNLOCK_MUTEX_W(env); } } @@ -1510,6 +1551,12 @@ done: return MDB_SUCCESS; } +/** Read the environment parameters of a DB environment before + * mapping it into memory. + * @param[in] env the environment handle + * @param[out] meta address of where to store the meta information + * @return 0 on success, non-zero on failure. + */ static int mdb_env_read_header(MDB_env *env, MDB_meta *meta) { @@ -1560,6 +1607,11 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) return 0; } +/** Write the environment parameters of a freshly created DB environment. + * @param[in] env the environment handle + * @param[out] meta address of where to store the meta information + * @return 0 on success, non-zero on failure. + */ static int mdb_env_init_meta(MDB_env *env, MDB_meta *meta) { @@ -1610,6 +1662,10 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) return rc; } +/** Update the environment info to commit a transaction. + * @param[in] txn the transaction that's being committed + * @return 0 on success, non-zero on failure. + */ static int mdb_env_write_meta(MDB_txn *txn) { @@ -1690,6 +1746,11 @@ mdb_env_write_meta(MDB_txn *txn) return MDB_SUCCESS; } +/** Check both meta pages to see which one is newer. + * @param[in] env the environment handle + * @param[out] which address of where to store the meta toggle ID + * @return 0 on success, non-zero on failure. + */ static int mdb_env_read_meta(MDB_env *env, int *which) { @@ -1759,6 +1820,8 @@ mdb_env_get_maxreaders(MDB_env *env, int *readers) return MDB_SUCCESS; } +/** Further setup required for opening an MDB environment + */ static int mdb_env_open2(MDB_env *env, unsigned int flags) { @@ -4493,6 +4556,10 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, int mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) { + /** Only a subset of the @ref mdb_env flags can be changed + * at runtime. Changing other flags requires closing the environment + * and re-opening it with the new flags. + */ #define CHANGEABLE (MDB_NOSYNC) if ((flag & CHANGEABLE) != flag) return EINVAL; diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 4dac96d181..7662c08d29 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -151,7 +151,12 @@ typedef void (MDB_rel_func)(void *newptr, void *oldptr, size_t size); #define MDB_REVERSEKEY 0x02 /** use sorted duplicates */ #define MDB_DUPSORT 0x04 - /** numeric keys in native byte order */ + /** numeric keys in native byte order. + * @note The keys size must actually be equal to + * sizeof(int) or sizeof(long) otherwise there will be + * alignment issues. On some processors, accessing misaligned + * data will cause a SIGBUS. + */ #define MDB_INTEGERKEY 0x08 /** with #MDB_DUPSORT, sorted dup items have fixed size */ #define MDB_DUPFIXED 0x10 From 0c487538cf05df05b2c60e1814cf8153fae768fc Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 8 Sep 2011 20:18:29 -0700 Subject: [PATCH 2/4] Partial revert, fix MDB_page definition --- libraries/libmdb/mdb.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 5a73e99317..21c71b2cdc 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -469,10 +469,12 @@ typedef struct MDB_txninfo { * headers on any page after the first. */ typedef struct MDB_page { - union { - pgno_t mp_pgno; /**< page number */ - void * mp_next; /**< for in-memory list of freed structs */ - }; +#define mp_pgno mp_p.p_pgno +#define mp_next mp_p.p_next + union padded { + pgno_t p_pgno; /**< page number */ + void * p_next; /**< for in-memory list of freed structs */ + } mp_p; #define P_BRANCH 0x01 /**< branch page */ #define P_LEAF 0x02 /**< leaf page */ #define P_OVERFLOW 0x04 /**< overflow page */ @@ -480,13 +482,16 @@ typedef struct MDB_page { #define P_DIRTY 0x10 /**< dirty page */ #define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */ uint32_t mp_flags; - union { +#define mp_lower mp_pb.pb.pb_lower +#define mp_upper mp_pb.pb.pb_upper +#define mp_pages mp_pb.pb_pages + union page_bounds { struct { - indx_t mp_lower; /**< lower bound of free space */ - indx_t mp_upper; /**< upper bound of free space */ - }; - uint32_t mp_pages; /**< number of overflow pages */ - }; + indx_t pb_lower; /**< lower bound of free space */ + indx_t pb_upper; /**< upper bound of free space */ + } pb; + uint32_t pb_pages; /**< number of overflow pages */ + } mp_pb; indx_t mp_ptrs[1]; /**< dynamic size */ } MDB_page; @@ -2897,7 +2902,6 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, set1: if (exactp) *exactp = 1; - rc = 0; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); goto set3; } @@ -3336,6 +3340,9 @@ top: rdata = &xdata; xdata.mv_size = sizeof(MDB_db); xdata.mv_data = &dummy; + /* new sub-DB, must fully init xcursor */ + if (flags == MDB_CURRENT) + flags = 0; goto new_sub; } goto put_sub; From a7edb95e6496bb65eed291fffc1eb11fbf60f7cd Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Fri, 9 Sep 2011 03:52:12 -0700 Subject: [PATCH 3/4] Minor typos, additional clarification --- libraries/libmdb/mdb.c | 6 +++--- libraries/libmdb/mdb.h | 26 ++++++++++++++------------ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 21c71b2cdc..1363ca4f2a 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -316,8 +316,8 @@ typedef uint16_t indx_t; * Since the database uses multi-version concurrency control, readers don't * actually need any locking. This table is used to keep track of which * readers are using data from which old transactions, so that we'll know - * when a particular old transaction is no longer in use, Old transactions - * that have freed any data pages can then have their freed pages reclaimed + * when a particular old transaction is no longer in use. Old transactions + * that have discarded any data pages can then have those pages reclaimed * for use by a later write transaction. * * The lock table is constructed such that reader slots are aligned with the @@ -886,7 +886,7 @@ mdb_strerror(int err) * @param[in] buf the buffer to write into. Should always be #DKBUF. * @return The key in hexadecimal form. */ -static char * +char * mdb_dkey(MDB_val *key, char *buf) { char *ptr = buf; diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 7662c08d29..3b5d74f00e 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -152,11 +152,7 @@ typedef void (MDB_rel_func)(void *newptr, void *oldptr, size_t size); /** use sorted duplicates */ #define MDB_DUPSORT 0x04 /** numeric keys in native byte order. - * @note The keys size must actually be equal to - * sizeof(int) or sizeof(long) otherwise there will be - * alignment issues. On some processors, accessing misaligned - * data will cause a SIGBUS. - */ + * The keys must all be of the same size. */ #define MDB_INTEGERKEY 0x08 /** with #MDB_DUPSORT, sorted dup items have fixed size */ #define MDB_DUPFIXED 0x10 @@ -205,7 +201,7 @@ typedef enum MDB_cursor_op { MDB_PREV_DUP, /**< Position at previous data item of current key. Only for #MDB_DUPSORT */ MDB_PREV_NODUP, /**< Position at last data item of previous key. - only for #MDB_DUPSORT */ + Only for #MDB_DUPSORT */ MDB_SET, /**< Position at specified key */ MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ } MDB_cursor_op; @@ -498,7 +494,7 @@ void mdb_txn_abort(MDB_txn *txn); /** Reset a read-only transaction. * This releases the current reader lock but doesn't free the * transaction handle, allowing it to be used again later by #mdb_txn_renew(). - * It otherwise has the same affect as #mdb_txn_abort() but saves some memory + * It otherwise has the same effect as #mdb_txn_abort() but saves some memory * allocation/deallocation overhead if a thread is going to start a new * read-only transaction again soon. * All cursors opened within the transaction must be closed before the transaction @@ -544,11 +540,9 @@ int mdb_txn_renew(MDB_txn *txn); * keys may have multiple data items, stored in sorted order.) By default * keys must be unique and may have only a single data item. *
  • #MDB_INTEGERKEY - * Keys are binary integers in native byte order. On Big-Endian systems - * this flag has no effect. On Little-Endian systems this flag behaves - * the same as #MDB_REVERSEKEY. This flag is simply provided as a - * convenience so that applications don't need to detect Endianness themselves - * when using integers as keys. + * Keys are binary integers in native byte order. Setting this option + * requires all keys to be the same size, typically sizeof(int) + * or sizeof(long). *
  • #MDB_DUPFIXED * This flag may only be used in combination with #MDB_DUPSORT. This option * tells the library that the data items for this database are all the same @@ -558,6 +552,9 @@ int mdb_txn_renew(MDB_txn *txn); *
  • #MDB_INTEGERDUP * This option specifies that duplicate data items are also integers, and * should be sorted as such. + *
  • #MDB_REVERSEDUP + * This option specifies that duplicate data items should be compared as + * strings in reverse order. *
  • #MDB_CREATE * Create the named database if it doesn't exist. This option is not * allowed in a read-only transaction or a read-only environment. @@ -660,6 +657,11 @@ int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel); * If the database supports duplicate keys (#MDB_DUPSORT) then the * first data item for the key will be returned. Retrieval of other * items requires the use of #mdb_cursor_get(). + * + * @note The memory pointed to by the returned values is owned by the + * database. The caller need not dispose of the memory, and may not + * modify it in any way. For values returned in a read-only transaction + * any modification attempts will cause a SIGSEGV. * @param[in] txn A transaction handle returned by #mdb_txn_begin() * @param[in] dbi A database handle returned by #mdb_open() * @param[in] key The key to search for in the database From acdc248f57306edc00046c43e5af0126e06de50b Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Fri, 9 Sep 2011 21:43:22 -0700 Subject: [PATCH 4/4] If put(NOOVERWRITE) exists, return existing data --- libraries/libmdb/mdb.c | 4 +++- libraries/libmdb/mdb.h | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 1363ca4f2a..b392826d07 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -3283,9 +3283,11 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, goto top; } else { int exact = 0; - rc = mdb_cursor_set(mc, key, NULL, MDB_SET, &exact); + MDB_val d2; + rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); if (flags == MDB_NOOVERWRITE && rc == 0) { DPRINTF("duplicate key [%s]", DKEY(key)); + *data = d2; return MDB_KEYEXIST; } if (rc && rc != MDB_NOTFOUND) diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 3b5d74f00e..c0d4edcf4a 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -683,7 +683,7 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); * @param[in] txn A transaction handle returned by #mdb_txn_begin() * @param[in] dbi A database handle returned by #mdb_open() * @param[in] key The key to store in the database - * @param[in] data The data to store + * @param[in,out] data The data to store * @param[in] flags Special options for this operation. This parameter * must be set to 0 or by bitwise OR'ing together one or more of the * values described here. @@ -696,7 +696,8 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key * does not already appear in the database. The function will return * #MDB_KEYEXIST if the key already appears in the database, even if - * the database supports duplicates (#MDB_DUPSORT). + * the database supports duplicates (#MDB_DUPSORT). The \b data + * parameter will be set to point to the existing item. * * @return A non-zero error value on failure and 0 on success. Some possible * errors are: