From ec2d82f79ae0884326daf2276e42052fae2ca750 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:55 +0200 Subject: [PATCH 01/16] libmdb/.gitignore += test[45], Doxygen output --- libraries/libmdb/.gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libraries/libmdb/.gitignore b/libraries/libmdb/.gitignore index 7455ec9c11..721ccf720b 100644 --- a/libraries/libmdb/.gitignore +++ b/libraries/libmdb/.gitignore @@ -1,5 +1,5 @@ mtest -mtest[23] +mtest[2345] testdb mdb_stat *.[ao] @@ -11,3 +11,5 @@ mdb_stat core core.* valgrind.* +man/ +html/ From 3dc8fa491add8aaa7688375439d967bb5ffe524f Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:55 +0200 Subject: [PATCH 02/16] Cleanup mdb macros. Parenthesize. Rename GetPageSize -> GET_PAGESIZE since it does not behave like a function (it has a non-pointer output argument). --- libraries/libmdb/mdb.c | 35 ++++++++++++++++++----------------- libraries/libmdb/midl.c | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index b392826d07..7150d5062c 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -78,38 +78,38 @@ #define pthread_mutex_t HANDLE #define pthread_key_t DWORD #define pthread_self() GetCurrentThreadId() -#define pthread_key_create(x,y) *(x) = TlsAlloc() +#define pthread_key_create(x,y) (*(x) = TlsAlloc()) #define pthread_key_delete(x) TlsFree(x) #define pthread_getspecific(x) TlsGetValue(x) #define pthread_setspecific(x,y) TlsSetValue(x,y) #define pthread_mutex_unlock(x) ReleaseMutex(x) #define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) -#define LOCK_MUTEX_R(env) pthread_mutex_lock(env->me_rmutex) -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(env->me_rmutex) -#define LOCK_MUTEX_W(env) pthread_mutex_lock(env->me_wmutex) -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(env->me_wmutex) +#define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex) +#define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex) +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex) #define getpid() GetCurrentProcessId() -#define fdatasync(fd) !FlushFileBuffers(fd) +#define fdatasync(fd) (!FlushFileBuffers(fd)) #define ErrCode() GetLastError() -#define GetPageSize(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;} +#define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;} #define close(fd) CloseHandle(fd) #define munmap(ptr,len) UnmapViewOfFile(ptr) #else /** Lock the reader mutex. */ -#define LOCK_MUTEX_R(env) pthread_mutex_lock(&env->me_txns->mti_mutex) +#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex) /** Unlock the reader mutex. */ -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&env->me_txns->mti_mutex) +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex) /** Lock the writer mutex. * Only a single write transaction is allowed at a time. Other writers * will block waiting for this mutex. */ -#define LOCK_MUTEX_W(env) pthread_mutex_lock(&env->me_txns->mti_wmutex) +#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex) /** Unlock the writer mutex. */ -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&env->me_txns->mti_wmutex) +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex) /** Get the error code for the last failed system function. */ @@ -125,13 +125,13 @@ * Mainly used to initialize file variables and signify that they are * unused. */ -#define INVALID_HANDLE_VALUE -1 +#define INVALID_HANDLE_VALUE (-1) /** Get the size of a memory page for the system. * This is the basic size that the platform's memory manager uses, and is * fundamental to the use of memory-mapped files. */ -#define GetPageSize(x) (x) = sysconf(_SC_PAGE_SIZE) +#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) #endif /** @} */ @@ -190,7 +190,7 @@ typedef ULONG pgno_t; /** A default memory page size. * The actual size is platform-dependent, but we use this for * boot-strapping. We probably should not be using this any more. - * The #GetPageSize() macro is used to get the actual size. + * The #GET_PAGESIZE() macro is used to get the actual size. * * Note that we don't currently support Huge pages. On Linux, * regular data files cannot use Huge pages, and in general @@ -596,7 +596,8 @@ typedef struct MDB_node { #define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) /** Set the \b node's key into \b key, if requested. */ -#define MDB_SET_KEY(node, key) if (key!=NULL) {(key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node);} +#define MDB_SET_KEY(node, key) { if ((key) != NULL) { \ + (key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node); } } /** Information about a single database in the environment. */ typedef struct MDB_db { @@ -768,7 +769,7 @@ struct MDB_env { size_t me_mapsize; /**< size of the data memory map */ off_t me_size; /**< current file size */ pgno_t me_maxpg; /**< me_mapsize / me_psize */ - unsigned int me_psize; /**< size of a page, from #GetPageSize */ + unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */ unsigned int me_db_toggle; /**< which DB table is current */ MDB_dbx *me_dbxs; /**< array of static DB info */ MDB_db *me_dbs[2]; /**< two arrays of MDB_db info */ @@ -1627,7 +1628,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) DPUTS("writing new meta page"); - GetPageSize(psize); + GET_PAGESIZE(psize); meta->mm_magic = MDB_MAGIC; meta->mm_version = MDB_VERSION; diff --git a/libraries/libmdb/midl.c b/libraries/libmdb/midl.c index 7932d3caee..2aee4311ea 100644 --- a/libraries/libmdb/midl.c +++ b/libraries/libmdb/midl.c @@ -129,7 +129,7 @@ int mdb_midl_append( IDL ids, ID id ) /* Quicksort + Insertion sort for small arrays */ #define SMALL 8 -#define SWAP(a,b) itmp=(a);(a)=(b);(b)=itmp +#define SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; } void mdb_midl_sort( ID *ids ) From c5dad7a6d0eb1b3597beda18b38fe92820df0d30 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:55 +0200 Subject: [PATCH 03/16] C90 compatibility cleanup in mdb. Fix void* pointer arithmetic in cintcmp(). Fix invalid ';'s in declarations after possibly-empty macros. --- libraries/libmdb/mdb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 7150d5062c..db5a11b2b2 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -243,7 +243,7 @@ typedef ULONG pgno_t; */ #define DKEY(x) mdb_dkey(x, kbuf) #else -#define DKBUF +#define DKBUF typedef int dummy_kbuf /* so we can put ';' after */ #define DKEY(x) #endif @@ -268,7 +268,7 @@ typedef ULONG pgno_t; #define LAZY_RWLOCK_WRLOCK(x) /** Grab the DB table read lock */ #define LAZY_RWLOCK_RDLOCK(x) - /** Declare the DB table rwlock */ + /** Declare the DB table rwlock. Should not be followed by ';'. */ #define LAZY_RWLOCK_DEF(x) /** Initialize the DB table rwlock */ #define LAZY_RWLOCK_INIT(x,y) @@ -781,7 +781,7 @@ struct MDB_env { /** ID2L of pages that were written during a write txn */ ID2 me_dirty_list[MDB_IDL_UM_SIZE]; /** rwlock for the DB tables, if #LAZY_LOCKS is false */ - LAZY_RWLOCK_DEF(me_dblock); + LAZY_RWLOCK_DEF(me_dblock) #ifdef _WIN32 HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */ HANDLE me_wmutex; @@ -2316,8 +2316,8 @@ cintcmp(const MDB_val *a, const MDB_val *b) unsigned short *u, *c; int x; - u = a->mv_data + a->mv_size; - c = b->mv_data + a->mv_size; + u = (unsigned short *) ((char *) a->mv_data + a->mv_size); + c = (unsigned short *) ((char *) b->mv_data + a->mv_size); do { x = *--u - *--c; } while(!x && u > (unsigned short *)a->mv_data); From 88da18cccfb7e76b35e075b0c9419c6b164c6a43 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:55 +0200 Subject: [PATCH 04/16] Put MDB_node. in host byte order. --- libraries/libmdb/mdb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index db5a11b2b2..fb0df164ff 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -533,10 +533,13 @@ typedef struct MDB_page { typedef struct MDB_node { /** lo and hi are used for data size on leaf nodes and for * child pgno on branch nodes. On 64 bit platforms, flags - * is also used for pgno. (branch nodes ignore flags) + * is also used for pgno. (Branch nodes have no flags). + * They are in in host byte order in case that lets some + * accesses be optimized into a 32-bit word access. */ - unsigned short mn_lo; - unsigned short mn_hi; /**< part of dsize or pgno */ +#define mn_lo mn_offset[__BYTE_ORDER!=__LITTLE_ENDIAN] +#define mn_hi mn_offset[__BYTE_ORDER==__LITTLE_ENDIAN] /**< part of dsize or pgno */ + unsigned short mn_offset[2]; unsigned short mn_flags; /**< flags for special node types */ #define F_BIGDATA 0x01 /**< data put on overflow page */ #define F_SUBDATA 0x02 /**< data is a sub-database */ From 69e53cf700bcceb5c531384ab97e10c2ecff922a Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:55 +0200 Subject: [PATCH 05/16] Check some mdb integer type assumptions --- libraries/libmdb/mdb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index fb0df164ff..9d925ca684 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -48,6 +48,7 @@ #include #include +#include #include #include #include @@ -63,6 +64,12 @@ #include "mdb.h" #include "midl.h" +#if (__BYTE_ORDER == __LITTLE_ENDIAN) == (__BYTE_ORDER == __BIG_ENDIAN) +# error "Unknown or unsupported endianness (__BYTE_ORDER)" +#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF +# error "Two's complement, reasonably sized integer types, please" +#endif + /** @defgroup internal MDB Internals * @{ */ From 2d1f3b7ed39007201cb009a4e7ed8aa58a4be6fc Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:55 +0200 Subject: [PATCH 06/16] Fix memn(r)cmp of key sizes > INT_MAX. Simplify. --- libraries/libmdb/mdb.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 9d925ca684..b73541bd39 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -2340,45 +2340,44 @@ cintcmp(const MDB_val *a, const MDB_val *b) static int memncmp(const MDB_val *a, const MDB_val *b) { - int diff, len_diff; + int diff; + ssize_t len_diff; unsigned int len; len = a->mv_size; - len_diff = a->mv_size - b->mv_size; - if (len_diff > 0) + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { len = b->mv_size; + len_diff = 1; + } + diff = memcmp(a->mv_data, b->mv_data, len); - return diff ? diff : len_diff; + return diff ? diff : len_diff<0 ? -1 : len_diff; } static int memnrcmp(const MDB_val *a, const MDB_val *b) { const unsigned char *p1, *p2, *p1_lim; - int diff, len_diff; + ssize_t len_diff; + int diff; - if (b->mv_size == 0) - return a->mv_size != 0; - if (a->mv_size == 0) - return -1; + p1_lim = (const unsigned char *)a->mv_data; + p1 = (const unsigned char *)a->mv_data + a->mv_size; + p2 = (const unsigned char *)b->mv_data + b->mv_size; - p1 = (const unsigned char *)a->mv_data + a->mv_size - 1; - p2 = (const unsigned char *)b->mv_data + b->mv_size - 1; - - len_diff = a->mv_size - b->mv_size; - if (len_diff < 0) - p1_lim = p1 - a->mv_size; - else - p1_lim = p1 - b->mv_size; + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { + p1_lim += len_diff; + len_diff = 1; + } while (p1 > p1_lim) { - diff = *p1 - *p2; + diff = *--p1 - *--p2; if (diff) return diff; - p1--; - p2--; } - return len_diff; + return len_diff<0 ? -1 : len_diff; } /* Search for key within a leaf page, using binary search. From 5f682934751d54263df19f3b181f1555158daf25 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:55 +0200 Subject: [PATCH 07/16] explain mdl_midl_sort() istack size --- libraries/libmdb/midl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libraries/libmdb/midl.c b/libraries/libmdb/midl.c index 2aee4311ea..7a7b59c7c0 100644 --- a/libraries/libmdb/midl.c +++ b/libraries/libmdb/midl.c @@ -15,6 +15,7 @@ * . */ +#include #include #include #include @@ -134,7 +135,8 @@ int mdb_midl_append( IDL ids, ID id ) void mdb_midl_sort( ID *ids ) { - int istack[16*sizeof(int)]; + /* Max possible depth of int-indexed tree * 2 items/level */ + int istack[sizeof(int)*CHAR_BIT * 2]; int i,j,k,l,ir,jstack; ID a, itmp; From bf9d722bec7d0e6094242b405d6a66720b934fc6 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:56 +0200 Subject: [PATCH 08/16] mdb type cleanup: Consistent reader count types. Use unsigned int for reader counts/max limits, that's the smallest change. Don't need uint32_t when mdb_env_set_maxreaders() takes less. Change prototypes of mdb_env_set_maxreaders,mdb_env_get_maxreaders(). Check the mdb_env_set_maxreaders() argument. --- libraries/libmdb/mdb.c | 8 ++++---- libraries/libmdb/mdb.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index b73541bd39..1a539ba1cb 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -436,7 +436,7 @@ typedef struct MDB_txbody { * This always records the maximum count, it is not decremented * when readers release their slots. */ - uint32_t mtb_numreaders; + unsigned mtb_numreaders; /** The ID of the most recent meta page in the database. * This is recorded here only for convenience; the value can always * be determined by reading the main database meta pages. @@ -1819,16 +1819,16 @@ mdb_env_set_maxdbs(MDB_env *env, int dbs) } int -mdb_env_set_maxreaders(MDB_env *env, int readers) +mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) { - if (env->me_map) + if (env->me_map || readers < 1) return EINVAL; env->me_maxreaders = readers; return MDB_SUCCESS; } int -mdb_env_get_maxreaders(MDB_env *env, int *readers) +mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) { if (!env || !readers) return EINVAL; diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index c0d4edcf4a..f718c35ac3 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -414,7 +414,7 @@ int mdb_env_set_mapsize(MDB_env *env, size_t size); *
  • EINVAL - an invalid parameter was specified, or the environment is already open. * */ -int mdb_env_set_maxreaders(MDB_env *env, int readers); +int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers); /** Get the maximum number of threads for the environment. * @param[in] env An environment handle returned by #mdb_env_create() @@ -425,7 +425,7 @@ int mdb_env_set_maxreaders(MDB_env *env, int readers); *
  • EINVAL - an invalid parameter was specified. * */ -int mdb_env_get_maxreaders(MDB_env *env, int *readers); +int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers); /** Set the maximum number of databases for the environment. * This function is only needed if multiple databases will be used in the From b1cfff4aeb9aceed8b18714f4029bd53d59f05ca Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:56 +0200 Subject: [PATCH 09/16] Add mdb.c:txnid_t for readability. Maybe later we'll test txnid wraparound with it defined to ushort. --- libraries/libmdb/mdb.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 1a539ba1cb..0c1329e856 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -168,6 +168,11 @@ */ typedef ULONG pgno_t; + /** A transaction ID. + * See struct MDB_txn.mt_txnid for details. + */ +typedef ULONG txnid_t; + /** @defgroup debug Debug Macros * @{ */ @@ -382,7 +387,7 @@ typedef struct MDB_rxbody { * started from so we can avoid overwriting any data used in that * particular version. */ - ULONG mrb_txnid; + txnid_t mrb_txnid; /** The process ID of the process owning this reader txn. */ pid_t mrb_pid; /** The thread ID of the thread owning this txn. */ @@ -431,7 +436,7 @@ typedef struct MDB_txbody { * This is recorded here only for convenience; the value can always * be determined by reading the main database meta pages. */ - ULONG mtb_txnid; + txnid_t mtb_txnid; /** The number of slots that have been used in the reader table. * This always records the maximum count, it is not decremented * when readers release their slots. @@ -641,7 +646,7 @@ typedef struct MDB_meta { /** Any persistent environment flags. @ref mdb_env */ #define mm_flags mm_dbs[0].md_flags pgno_t mm_last_pg; /**< last used page in file */ - ULONG mm_txnid; /**< txnid that committed this page */ + txnid_t mm_txnid; /**< txnid that committed this page */ } MDB_meta; /** Auxiliary DB info. @@ -669,7 +674,7 @@ struct MDB_txn { * Only committed write transactions increment the ID. If a transaction * aborts, the ID may be re-used by the next writer. */ - ULONG mt_txnid; + txnid_t mt_txnid; MDB_env *mt_env; /**< the DB environment */ /** The list of pages that became unused during this transaction. * This is an #IDL. @@ -754,7 +759,7 @@ typedef struct MDB_oldpages { */ struct MDB_oldpages *mo_next; /** The ID of the transaction in which these pages were freed. */ - ULONG mo_txnid; + txnid_t mo_txnid; /** An #IDL of the pages */ pgno_t mo_pages[1]; /* dynamic */ } MDB_oldpages; @@ -966,7 +971,7 @@ mdb_alloc_page(MDB_cursor *mc, int num) /* See if there's anything in the free DB */ MDB_cursor m2; MDB_node *leaf; - ULONG *kptr, oldest; + txnid_t *kptr, oldest; m2.mc_txn = txn; m2.mc_dbi = FREE_DBI; @@ -974,13 +979,13 @@ mdb_alloc_page(MDB_cursor *mc, int num) m2.mc_flags = 0; mdb_search_page(&m2, NULL, 0); leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0); - kptr = (ULONG *)NODEKEY(leaf); + kptr = (txnid_t *)NODEKEY(leaf); { unsigned int i; oldest = txn->mt_txnid - 1; for (i=0; imt_env->me_txns->mti_numreaders; i++) { - ULONG mr = txn->mt_env->me_txns->mti_readers[i].mr_txnid; + txnid_t mr = txn->mt_env->me_txns->mti_readers[i].mr_txnid; if (mr && mr < oldest) oldest = mr; } @@ -1374,7 +1379,7 @@ mdb_txn_commit(MDB_txn *txn) #endif /* write to last page of freeDB */ key.mv_size = sizeof(pgno_t); - key.mv_data = (char *)&txn->mt_txnid; + key.mv_data = &txn->mt_txnid; data.mv_data = txn->mt_free_pgs; /* The free list can still grow during this call, * despite the pre-emptive touches above. So check @@ -1397,7 +1402,7 @@ mdb_txn_commit(MDB_txn *txn) mop = env->me_pghead; key.mv_size = sizeof(pgno_t); - key.mv_data = (char *)&mop->mo_txnid; + key.mv_data = &mop->mo_txnid; data.mv_size = MDB_IDL_SIZEOF(mop->mo_pages); data.mv_data = mop->mo_pages; mdb_cursor_put(&mc, &key, &data, 0); From b56be64d2e5ff6ab79533df2949ebdf3df383427 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 20:01:07 +0200 Subject: [PATCH 10/16] Drop ULONG in favor of size_t. --- libraries/libmdb/mdb.c | 151 ++++++++++++++++++------------------ libraries/libmdb/mdb.h | 12 +-- libraries/libmdb/mdb_stat.c | 8 +- libraries/libmdb/midl.h | 11 +-- 4 files changed, 91 insertions(+), 91 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 0c1329e856..19977cab1d 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -166,12 +166,12 @@ * @note In the #MDB_node structure, we only store 48 bits of this value, * which thus limits us to only 60 bits of addressable data. */ -typedef ULONG pgno_t; +typedef ID pgno_t; /** A transaction ID. * See struct MDB_txn.mt_txnid for details. */ -typedef ULONG txnid_t; +typedef ID txnid_t; /** @defgroup debug Debug Macros * @{ @@ -563,6 +563,9 @@ typedef struct MDB_node { /** Size of the node header, excluding dynamic data at the end */ #define NODESIZE offsetof(MDB_node, mn_data) + /** Bit position of top word in page number, for shifting mn_flags */ +#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0) + /** Size of a node in a branch page with a given key. * This is just the node header plus the key, there is no data. */ @@ -583,18 +586,13 @@ typedef struct MDB_node { #define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) /** Get the page number pointed to by a branch node */ -#if LONG_MAX == 0x7fffffff -#define NODEPGNO(node) ((node)->mn_lo | ((node)->mn_hi << 16)) - /** Set the page number in a branch node */ -#define SETPGNO(node,pgno) do { \ - (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16;} while(0) -#else -#define NODEPGNO(node) ((node)->mn_lo | ((node)->mn_hi << 16) | ((unsigned long)(node)->mn_flags << 32)) +#define NODEPGNO(node) \ + ((node)->mn_lo | ((pgno_t) (node)->mn_hi << 16) | \ + (PGNO_TOPWORD ? ((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0)) /** Set the page number in a branch node */ #define SETPGNO(node,pgno) do { \ (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \ - (node)->mn_flags = (pgno) >> 32; } while(0) -#endif + if (PGNO_TOPWORD) (node)->mn_flags = (pgno) >> PGNO_TOPWORD; } while(0) /** Get the size of the data in a leaf node */ #define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16)) @@ -619,10 +617,10 @@ typedef struct MDB_db { uint32_t md_pad; /**< also ksize for LEAF2 pages */ uint16_t md_flags; /**< @ref mdb_open */ uint16_t md_depth; /**< depth of this tree */ - ULONG md_branch_pages; /**< number of internal pages */ - ULONG md_leaf_pages; /**< number of leaf pages */ - ULONG md_overflow_pages; /**< number of overflow pages */ - ULONG md_entries; /**< number of data items */ + pgno_t md_branch_pages; /**< number of internal pages */ + pgno_t md_leaf_pages; /**< number of leaf pages */ + pgno_t md_overflow_pages; /**< number of overflow pages */ + size_t md_entries; /**< number of data items */ pgno_t md_root; /**< the root page of this tree */ } MDB_db; @@ -999,7 +997,7 @@ mdb_alloc_page(MDB_cursor *mc, int num) pgno_t *idl; mdb_read_data(txn, leaf, &data); - idl = (ULONG *)data.mv_data; + idl = (ID *) data.mv_data; mop = malloc(sizeof(MDB_oldpages) + MDB_IDL_SIZEOF(idl) - sizeof(pgno_t)); mop->mo_next = txn->mt_env->me_pghead; mop->mo_txnid = *kptr; @@ -1009,10 +1007,10 @@ mdb_alloc_page(MDB_cursor *mc, int num) #if DEBUG > 1 { unsigned int i; - DPRINTF("IDL read txn %lu root %lu num %lu", + DPRINTF("IDL read txn %zu root %zu num %zu", mop->mo_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]); for (i=0; i %lu", mc->mc_dbi, mp->mp_pgno, np->mp_pgno); + DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi, mp->mp_pgno, np->mp_pgno); assert(mp->mp_pgno != np->mp_pgno); mdb_midl_append(mc->mc_txn->mt_free_pgs, mp->mp_pgno); pgno = np->mp_pgno; @@ -1199,7 +1197,7 @@ mdb_txn_renew(MDB_txn *txn) rc = mdb_txn_renew0(txn); if (rc == MDB_SUCCESS) { - DPRINTF("renew txn %lu%c %p on mdbenv %p, root page %lu", + DPRINTF("renew txn %zu%c %p on mdbenv %p, root page %zu", txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); } @@ -1231,7 +1229,7 @@ mdb_txn_begin(MDB_env *env, unsigned int flags, MDB_txn **ret) free(txn); else { *ret = txn; - DPRINTF("begin txn %lu%c %p on mdbenv %p, root page %lu", + DPRINTF("begin txn %zu%c %p on mdbenv %p, root page %zu", txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root); } @@ -1285,7 +1283,7 @@ mdb_txn_reset(MDB_txn *txn) if (txn == NULL) return; - DPRINTF("reset txn %lu%c %p on mdbenv %p, root page %lu", + DPRINTF("reset txn %zu%c %p on mdbenv %p, root page %zu", txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); @@ -1298,7 +1296,7 @@ mdb_txn_abort(MDB_txn *txn) if (txn == NULL) return; - DPRINTF("abort txn %lu%c %p on mdbenv %p, root page %lu", + DPRINTF("abort txn %zu%c %p on mdbenv %p, root page %zu", txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); @@ -1343,7 +1341,7 @@ mdb_txn_commit(MDB_txn *txn) if (!txn->mt_u.dirty_list[0].mid) goto done; - DPRINTF("committing txn %lu %p on mdbenv %p, root page %lu", + DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu", txn->mt_txnid, txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); mc.mc_txn = txn; @@ -1358,7 +1356,7 @@ mdb_txn_commit(MDB_txn *txn) /* save to free list */ if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) { MDB_val key, data; - ULONG i; + pgno_t i; /* make sure last page of freeDB is touched and on freelist */ key.mv_size = MAXKEYSIZE+1; @@ -1369,11 +1367,11 @@ mdb_txn_commit(MDB_txn *txn) #if DEBUG > 1 { unsigned int i; - ULONG *idl = txn->mt_free_pgs; - DPRINTF("IDL write txn %lu root %lu num %lu", + ID *idl = txn->mt_free_pgs; + DPRINTF("IDL write txn %zu root %zu num %zu", txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]); for (i=0; imt_u.dirty_list[0].mid; i++) { size_t wsize; dp = txn->mt_u.dirty_list[i].mptr; - DPRINTF("committing page %lu", dp->mp_pgno); + DPRINTF("committing page %zu", dp->mp_pgno); size = dp->mp_pgno * env->me_psize; ov.Offset = size & 0xffffffff; ov.OffsetHigh = size >> 16; @@ -1489,7 +1487,7 @@ mdb_txn_commit(MDB_txn *txn) lseek(env->me_fd, dp->mp_pgno * env->me_psize, SEEK_SET); next = dp->mp_pgno; } - DPRINTF("committing page %lu", dp->mp_pgno); + DPRINTF("committing page %zu", dp->mp_pgno); iov[n].iov_len = env->me_psize; if (IS_OVERFLOW(dp)) iov[n].iov_len *= dp->mp_pages; iov[n].iov_base = dp; @@ -1608,7 +1606,7 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) p = (MDB_page *)page; if (!F_ISSET(p->mp_flags, P_META)) { - DPRINTF("page %lu not a meta page", p->mp_pgno); + DPRINTF("page %zu not a meta page", p->mp_pgno); return EINVAL; } @@ -1703,7 +1701,7 @@ mdb_env_write_meta(MDB_txn *txn) assert(txn->mt_env != NULL); toggle = !txn->mt_toggle; - DPRINTF("writing meta page %d for root page %lu", + DPRINTF("writing meta page %d for root page %zu", toggle, txn->mt_dbs[MAIN_DBI].md_root); env = txn->mt_env; @@ -1925,11 +1923,11 @@ mdb_env_open2(MDB_env *env, unsigned int flags) DPRINTF("opened database version %u, pagesize %u", env->me_metas[toggle]->mm_version, env->me_psize); DPRINTF("depth: %u", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_depth); - DPRINTF("entries: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_entries); - DPRINTF("branch pages: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_branch_pages); - DPRINTF("leaf pages: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_leaf_pages); - DPRINTF("overflow pages: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_overflow_pages); - DPRINTF("root: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_root); + DPRINTF("entries: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_entries); + DPRINTF("branch pages: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_branch_pages); + DPRINTF("leaf pages: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_leaf_pages); + DPRINTF("overflow pages: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_overflow_pages); + DPRINTF("root: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_root); return MDB_SUCCESS; } @@ -2406,7 +2404,7 @@ mdb_search_node(MDB_cursor *mc, MDB_val *key, int *exactp) nkeys = NUMKEYS(mp); - DPRINTF("searching %u keys in %s page %lu", + DPRINTF("searching %u keys in %s page %zu", nkeys, IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno); @@ -2438,7 +2436,7 @@ mdb_search_node(MDB_cursor *mc, MDB_val *key, int *exactp) DPRINTF("found leaf index %u [%s], rc = %i", i, DKEY(&nodekey), rc); else - DPRINTF("found branch index %u [%s -> %lu], rc = %i", + DPRINTF("found branch index %u [%s -> %zu], rc = %i", i, DKEY(&nodekey), NODEPGNO(node), rc); #endif @@ -2478,7 +2476,7 @@ cursor_pop_page(MDB_cursor *mc) if (mc->mc_snum) mc->mc_top--; - DPRINTF("popped page %lu off db %u cursor %p", top->mp_pgno, + DPRINTF("popped page %zu off db %u cursor %p", top->mp_pgno, mc->mc_dbi, (void *) mc); } } @@ -2486,7 +2484,7 @@ cursor_pop_page(MDB_cursor *mc) static int cursor_push_page(MDB_cursor *mc, MDB_page *mp) { - DPRINTF("pushing page %lu on db %u cursor %p", mp->mp_pgno, + DPRINTF("pushing page %zu on db %u cursor %p", mp->mp_pgno, mc->mc_dbi, (void *) mc); if (mc->mc_snum >= CURSOR_STACK) @@ -2517,7 +2515,7 @@ mdb_get_page(MDB_txn *txn, pgno_t pgno, MDB_page **ret) } *ret = p; if (!p) { - DPRINTF("page %lu not found", pgno); + DPRINTF("page %zu not found", pgno); assert(p != NULL); } return (p != NULL) ? MDB_SUCCESS : MDB_PAGE_NOTFOUND; @@ -2534,9 +2532,9 @@ mdb_search_page_root(MDB_cursor *mc, MDB_val *key, int modify) while (IS_BRANCH(mp)) { MDB_node *node; - DPRINTF("branch page %lu has %u keys", mp->mp_pgno, NUMKEYS(mp)); + DPRINTF("branch page %zu has %u keys", mp->mp_pgno, NUMKEYS(mp)); assert(NUMKEYS(mp) > 1); - DPRINTF("found index 0 to page %lu", NODEPGNO(NODEPTR(mp, 0))); + DPRINTF("found index 0 to page %zu", NODEPGNO(NODEPTR(mp, 0))); if (key == NULL) /* Initialize cursor to first page. */ mc->mc_ki[mc->mc_top] = 0; @@ -2579,7 +2577,7 @@ mdb_search_page_root(MDB_cursor *mc, MDB_val *key, int modify) return MDB_CORRUPTED; } - DPRINTF("found leaf page %lu for key [%s]", mp->mp_pgno, + DPRINTF("found leaf page %zu for key [%s]", mp->mp_pgno, key ? DKEY(key) : NULL); return MDB_SUCCESS; @@ -2616,7 +2614,7 @@ mdb_search_page(MDB_cursor *mc, MDB_val *key, int modify) mc->mc_snum = 1; mc->mc_top = 0; - DPRINTF("db %u root page %lu has flags 0x%X", + DPRINTF("db %u root page %zu has flags 0x%X", mc->mc_dbi, root, mc->mc_pg[0]->mp_flags); if (modify) { @@ -2658,7 +2656,7 @@ mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data) data->mv_size = NODEDSZ(leaf); memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); if ((rc = mdb_get_page(txn, pgno, &omp))) { - DPRINTF("read overflow page %lu failed", pgno); + DPRINTF("read overflow page %zu failed", pgno); return rc; } data->mv_data = METADATA(omp); @@ -2711,7 +2709,7 @@ mdb_sibling(MDB_cursor *mc, int move_right) } ptop = mc->mc_top-1; - DPRINTF("parent page is page %lu, index %u", + DPRINTF("parent page is page %zu, index %u", mc->mc_pg[ptop]->mp_pgno, mc->mc_ki[ptop]); cursor_pop_page(mc); @@ -2770,7 +2768,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } } - DPRINTF("cursor_next: top page is %lu in cursor %p", mp->mp_pgno, (void *) mc); + DPRINTF("cursor_next: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc); if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) { DPUTS("=====> move to next sibling page"); @@ -2779,11 +2777,11 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) return MDB_NOTFOUND; } mp = mc->mc_pg[mc->mc_top]; - DPRINTF("next page is %lu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); + DPRINTF("next page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); } else mc->mc_ki[mc->mc_top]++; - DPRINTF("==> cursor points to page %lu with %u keys, key index %u", + DPRINTF("==> cursor points to page %zu with %u keys, key index %u", mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); if (IS_LEAF2(mp)) { @@ -2839,7 +2837,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } } - DPRINTF("cursor_prev: top page is %lu in cursor %p", mp->mp_pgno, (void *) mc); + DPRINTF("cursor_prev: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc); if (mc->mc_ki[mc->mc_top] == 0) { DPUTS("=====> move to prev sibling page"); @@ -2849,13 +2847,13 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } mp = mc->mc_pg[mc->mc_top]; mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; - DPRINTF("prev page is %lu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); + DPRINTF("prev page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); } else mc->mc_ki[mc->mc_top]--; mc->mc_flags &= ~C_EOF; - DPRINTF("==> cursor points to page %lu with %u keys, key index %u", + DPRINTF("==> cursor points to page %zu with %u keys, key index %u", mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); if (IS_LEAF2(mp)) { @@ -3506,7 +3504,7 @@ mdb_new_page(MDB_cursor *mc, uint32_t flags, int num) if ((np = mdb_alloc_page(mc, num)) == NULL) return NULL; - DPRINTF("allocated new mpage %lu, page size %u", + DPRINTF("allocated new mpage %zu, page size %u", np->mp_pgno, mc->mc_txn->mt_env->me_psize); np->mp_flags = flags | P_DIRTY; np->mp_lower = PAGEHDRSZ; @@ -3568,7 +3566,7 @@ mdb_add_node(MDB_cursor *mc, indx_t indx, assert(mp->mp_upper >= mp->mp_lower); - DPRINTF("add to %s page %lu index %i, data size %zu key size %zu [%s]", + DPRINTF("add to %s page %zu index %i, data size %zu key size %zu [%s]", IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, indx, data ? data->mv_size : 0, key ? key->mv_size : 0, key ? DKEY(key) : NULL); @@ -3605,7 +3603,7 @@ mdb_add_node(MDB_cursor *mc, indx_t indx, node_size += sizeof(pgno_t); if ((ofp = mdb_new_page(mc, P_OVERFLOW, ovpages)) == NULL) return ENOMEM; - DPRINTF("allocated overflow page %lu", ofp->mp_pgno); + DPRINTF("allocated overflow page %zu", ofp->mp_pgno); flags |= F_BIGDATA; } else { node_size += data->mv_size; @@ -3614,7 +3612,7 @@ mdb_add_node(MDB_cursor *mc, indx_t indx, node_size += node_size & 1; if (node_size + sizeof(indx_t) > SIZELEFT(mp)) { - DPRINTF("not enough room in page %lu, got %u ptrs", + DPRINTF("not enough room in page %zu, got %u ptrs", mp->mp_pgno, NUMKEYS(mp)); DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower, mp->mp_upper - mp->mp_lower); @@ -3672,7 +3670,7 @@ mdb_del_node(MDB_page *mp, indx_t indx, int ksize) MDB_node *node; char *base; - DPRINTF("delete node %u on %s page %lu", indx, + DPRINTF("delete node %u on %s page %zu", indx, IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno); assert(indx < NUMKEYS(mp)); @@ -3758,7 +3756,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) } else { dbn = 2; } - DPRINTF("Sub-db %u for db %u root page %lu", dbn, mc->mc_dbi, db->md_root); + DPRINTF("Sub-db %u for db %u root page %zu", dbn, mc->mc_dbi, db->md_root); mx->mx_dbs[dbn] = *db; if (F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY)) mx->mx_dbxs[dbn].md_dirty = 1; @@ -3783,7 +3781,7 @@ mdb_xcursor_init2(MDB_cursor *mc) } else { dbn = 2; } - DPRINTF("Sub-db %u for db %u root page %lu", dbn, mc->mc_dbi, + DPRINTF("Sub-db %u for db %u root page %zu", dbn, mc->mc_dbi, mx->mx_dbs[dbn].md_root); mx->mx_txn.mt_next_pgno = mc->mc_txn->mt_next_pgno; } @@ -3832,7 +3830,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) /* Return the count of duplicate data items for the current key */ int -mdb_cursor_count(MDB_cursor *mc, unsigned long *countp) +mdb_cursor_count(MDB_cursor *mc, size_t *countp) { MDB_node *leaf; @@ -3874,7 +3872,7 @@ mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key) node = NODEPTR(mp, indx); ptr = mp->mp_ptrs[indx]; - DPRINTF("update key %u (ofs %u) [%.*s] to [%s] on page %lu", + DPRINTF("update key %u (ofs %u) [%.*s] to [%s] on page %zu", indx, ptr, (int)node->mn_ksize, (char *)NODEKEY(node), DKEY(key), @@ -3944,7 +3942,7 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst) data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); } - DPRINTF("moving %s node %u [%s] on page %lu to node %u on page %lu", + DPRINTF("moving %s node %u [%s] on page %zu to node %u on page %zu", IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", csrc->mc_ki[csrc->mc_top], DKEY(&key), @@ -3973,7 +3971,7 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst) key.mv_size = NODEKSZ(srcnode); key.mv_data = NODEKEY(srcnode); } - DPRINTF("update separator for source page %lu to [%s]", + DPRINTF("update separator for source page %zu to [%s]", csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key)); if ((rc = mdb_update_key(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], &key)) != MDB_SUCCESS) @@ -3995,7 +3993,7 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst) key.mv_size = NODEKSZ(srcnode); key.mv_data = NODEKEY(srcnode); } - DPRINTF("update separator for destination page %lu to [%s]", + DPRINTF("update separator for destination page %zu to [%s]", cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key)); if ((rc = mdb_update_key(cdst->mc_pg[cdst->mc_top-1], cdst->mc_ki[cdst->mc_top-1], &key)) != MDB_SUCCESS) @@ -4019,7 +4017,8 @@ mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst) MDB_node *srcnode; MDB_val key, data; - DPRINTF("merging page %lu into %lu", csrc->mc_pg[csrc->mc_top]->mp_pgno, cdst->mc_pg[cdst->mc_top]->mp_pgno); + DPRINTF("merging page %zu into %zu", csrc->mc_pg[csrc->mc_top]->mp_pgno, + cdst->mc_pg[cdst->mc_top]->mp_pgno); assert(csrc->mc_snum > 1); /* can't merge root page */ assert(cdst->mc_snum > 1); @@ -4054,7 +4053,7 @@ mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst) } } - DPRINTF("dst page %lu now has %u keys (%.1f%% filled)", + DPRINTF("dst page %zu now has %u keys (%.1f%% filled)", cdst->mc_pg[cdst->mc_top]->mp_pgno, NUMKEYS(cdst->mc_pg[cdst->mc_top]), (float)PAGEFILL(cdst->mc_txn->mt_env, cdst->mc_pg[cdst->mc_top]) / 10); /* Unlink the src page from parent and add to free list. @@ -4102,12 +4101,12 @@ mdb_rebalance(MDB_cursor *mc) unsigned int ptop; MDB_cursor mn; - DPRINTF("rebalancing %s page %lu (has %u keys, %.1f%% full)", + DPRINTF("rebalancing %s page %zu (has %u keys, %.1f%% full)", IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", mc->mc_pg[mc->mc_top]->mp_pgno, NUMKEYS(mc->mc_pg[mc->mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10); if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD) { - DPRINTF("no need to rebalance page %lu, above fill threshold", + DPRINTF("no need to rebalance page %zu, above fill threshold", mc->mc_pg[mc->mc_top]->mp_pgno); return MDB_SUCCESS; } @@ -4170,7 +4169,7 @@ mdb_rebalance(MDB_cursor *mc) mc->mc_ki[mc->mc_top] = 0; } - DPRINTF("found neighbor page %lu (%u keys, %.1f%% full)", + DPRINTF("found neighbor page %zu (%u keys, %.1f%% full)", mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10); /* If the neighbor page is above threshold and has at least two @@ -4201,7 +4200,7 @@ mdb_del0(MDB_cursor *mc, MDB_node *leaf) memcpy(&pg, NODEDATA(leaf), sizeof(pg)); ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize); for (i=0; imc_txn->mt_free_pgs, pg); pg++; } @@ -4290,7 +4289,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno) mp = mc->mc_pg[mc->mc_top]; newindx = mc->mc_ki[mc->mc_top]; - DPRINTF("-----> splitting %s page %lu and adding [%s] at index %i", + DPRINTF("-----> splitting %s page %zu and adding [%s] at index %i", IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, DKEY(newkey), mc->mc_ki[mc->mc_top]); @@ -4303,7 +4302,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno) mc->mc_pg[0] = pp; mc->mc_ki[0] = 0; mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = pp->mp_pgno; - DPRINTF("root split! new root = %lu", pp->mp_pgno); + DPRINTF("root split! new root = %zu", pp->mp_pgno); mc->mc_txn->mt_dbs[mc->mc_dbi].md_depth++; /* Add left (implicit) pointer. */ @@ -4320,7 +4319,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno) ptop = 0; } else { ptop = mc->mc_top-1; - DPRINTF("parent branch page is %lu", mc->mc_pg[ptop]->mp_pgno); + DPRINTF("parent branch page is %zu", mc->mc_pg[ptop]->mp_pgno); } /* Create a right sibling. */ @@ -4329,7 +4328,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno) mdb_cursor_copy(mc, &mn); mn.mc_pg[mn.mc_top] = rp; mn.mc_ki[ptop] = mc->mc_ki[ptop]+1; - DPRINTF("new right sibling: page %lu", rp->mp_pgno); + DPRINTF("new right sibling: page %zu", rp->mp_pgno); nkeys = NUMKEYS(mp); split_indx = nkeys / 2 + 1; diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index f718c35ac3..8e7d297c3b 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -232,10 +232,10 @@ typedef struct MDB_stat { unsigned int ms_psize; /**< Size of a database page. This is currently the same for all databases. */ unsigned int ms_depth; /**< Depth (height) of the B-tree */ - unsigned long ms_branch_pages; /**< Number of internal (non-leaf) pages */ - unsigned long ms_leaf_pages; /**< Number of leaf pages */ - unsigned long ms_overflow_pages; /**< Number of overflow pages */ - unsigned long ms_entries; /**< Number of data items */ + size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ + size_t ms_leaf_pages; /**< Number of leaf pages */ + size_t ms_overflow_pages; /**< Number of overflow pages */ + size_t ms_entries; /**< Number of data items */ } MDB_stat; /** Return the mdb library version information. @@ -542,7 +542,7 @@ int mdb_txn_renew(MDB_txn *txn); *
  • #MDB_INTEGERKEY * Keys are binary integers in native byte order. Setting this option * requires all keys to be the same size, typically sizeof(int) - * or sizeof(long). + * or sizeof(size_t). *
  • #MDB_DUPFIXED * This flag may only be used in combination with #MDB_DUPSORT. This option * tells the library that the data items for this database are all the same @@ -839,7 +839,7 @@ int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags); *
  • EINVAL - cursor is not initialized, or an invalid parameter was specified. * */ -int mdb_cursor_count(MDB_cursor *cursor, unsigned long *countp); +int mdb_cursor_count(MDB_cursor *cursor, size_t *countp); /** Compare two data items according to a particular database. * This returns a comparison as if the two data items were keys in the diff --git a/libraries/libmdb/mdb_stat.c b/libraries/libmdb/mdb_stat.c index 21b85585c6..4bcdb92fa6 100644 --- a/libraries/libmdb/mdb_stat.c +++ b/libraries/libmdb/mdb_stat.c @@ -52,10 +52,10 @@ int main(int argc,char * argv[]) rc = mdb_stat(txn, dbi, &mst); printf("Page size: %u\n", mst.ms_psize); printf("Tree depth: %u\n", mst.ms_depth); - printf("Branch pages: %lu\n", mst.ms_branch_pages); - printf("Leaf pages: %lu\n", mst.ms_leaf_pages); - printf("Overflow pages: %lu\n", mst.ms_overflow_pages); - printf("Entries: %lu\n", mst.ms_entries); + printf("Branch pages: %zu\n", mst.ms_branch_pages); + printf("Leaf pages: %zu\n", mst.ms_leaf_pages); + printf("Overflow pages: %zu\n", mst.ms_overflow_pages); + printf("Entries: %zu\n", mst.ms_entries); mdb_close(txn, dbi); mdb_txn_abort(txn); mdb_env_close(env); diff --git a/libraries/libmdb/midl.h b/libraries/libmdb/midl.h index 7ef2cc53b2..cf4a2b2924 100644 --- a/libraries/libmdb/midl.h +++ b/libraries/libmdb/midl.h @@ -26,19 +26,20 @@ #ifndef _MDB_MIDL_H_ #define _MDB_MIDL_H_ +#include + /** @defgroup internal MDB Internals * @{ */ - /** ULONG should be the largest integer type supported on a machine. - * It should be equal to the size of a pointer. - */ -#define ULONG unsigned long + /** @defgroup idls ID List Management * @{ */ /** A generic ID number. These were entryIDs in back-bdb. + * It should be the largest integer type supported on a machine. + * It should be equal to the size of a pointer. */ -typedef ULONG ID; +typedef size_t ID; /** An IDL is an ID List, a sorted array of IDs. The first * element of the array is a counter for how many actual From 90ed5533015ce2ad668422aca318e40ee1ce7410 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 10 Sep 2011 10:11:56 +0200 Subject: [PATCH 11/16] mdb integer type cleanup: Use MDB_dbi consistently. This changes the prototype of mdb_env_set_maxdbs(). --- libraries/libmdb/mdb.c | 15 +++++++++------ libraries/libmdb/mdb.h | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 19977cab1d..db4db98116 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -689,7 +689,7 @@ struct MDB_txn { /** Number of DB records in use. This number only ever increments; * we don't decrement it when individual DB handles are closed. */ - unsigned int mt_numdbs; + MDB_dbi mt_numdbs; #define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ #define MDB_TXN_ERROR 0x02 /**< an error has occurred */ @@ -772,8 +772,8 @@ struct MDB_env { uint32_t me_flags; uint32_t me_extrapad; /**< unused for now */ unsigned int me_maxreaders; /**< size of the reader table */ - unsigned int me_numdbs; /**< number of DBs opened */ - unsigned int me_maxdbs; /**< size of the DB table */ + MDB_dbi me_numdbs; /**< number of DBs opened */ + MDB_dbi me_maxdbs; /**< size of the DB table */ char *me_path; /**< path to the DB files */ char *me_map; /**< the memory map of the data file */ MDB_txninfo *me_txns; /**< the memory map of the lock file */ @@ -1250,6 +1250,7 @@ mdb_txn_reset0(MDB_txn *txn) } else { MDB_oldpages *mop; MDB_page *dp; + MDB_dbi dbi; unsigned int i; /* return all dirty pages to dpage list */ @@ -1270,8 +1271,8 @@ mdb_txn_reset0(MDB_txn *txn) } env->me_txn = NULL; - for (i=2; ime_numdbs; i++) - env->me_dbxs[i].md_dirty = 0; + for (dbi=2; dbime_numdbs; dbi++) + env->me_dbxs[dbi].md_dirty = 0; /* The writer mutex was locked in mdb_txn_begin. */ UNLOCK_MUTEX_W(env); } @@ -1412,6 +1413,7 @@ mdb_txn_commit(MDB_txn *txn) * touched so this is all in-place and cannot fail. */ { + MDB_dbi i; MDB_val data; data.mv_size = sizeof(MDB_db); @@ -1545,6 +1547,7 @@ done: { int toggle = !env->me_db_toggle; MDB_db *ip, *jp; + MDB_dbi i; ip = &env->me_dbs[toggle][2]; jp = &txn->mt_dbs[2]; @@ -1813,7 +1816,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size) } int -mdb_env_set_maxdbs(MDB_env *env, int dbs) +mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) { if (env->me_map) return EINVAL; diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 8e7d297c3b..ce3137ce1f 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -440,7 +440,7 @@ int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers); *
  • EINVAL - an invalid parameter was specified, or the environment is already open. * */ -int mdb_env_set_maxdbs(MDB_env *env, int dbs); +int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); /** Create a transaction for use with the environment. * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit(). From ed0fc55fd39b50b19b5c7cf30d41811b180f1ae5 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 11 Sep 2011 11:08:40 +0200 Subject: [PATCH 12/16] Restore missing LAZY_LOCKS semicolon. Lost in C90 compat commit c5dad7a6d0eb1b3597beda18b38fe92820df0d30. --- libraries/libmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index db4db98116..a101a7af9a 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -292,7 +292,7 @@ typedef ID txnid_t; #define LAZY_RWLOCK_UNLOCK(x) pthread_rwlock_unlock(x) #define LAZY_RWLOCK_WRLOCK(x) pthread_rwlock_wrlock(x) #define LAZY_RWLOCK_RDLOCK(x) pthread_rwlock_rdlock(x) -#define LAZY_RWLOCK_DEF(x) pthread_rwlock_t x +#define LAZY_RWLOCK_DEF(x) pthread_rwlock_t x; #define LAZY_RWLOCK_INIT(x,y) pthread_rwlock_init(x,y) #define LAZY_RWLOCK_DESTROY(x) pthread_rwlock_destroy(x) #endif From 946b38f471636e475661daf3569ce9d1f86c6ed2 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 11 Sep 2011 13:00:52 +0200 Subject: [PATCH 13/16] Fix mdb debug formats --- libraries/libmdb/mdb.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index a101a7af9a..d0dfc01d94 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -189,7 +189,8 @@ typedef ID txnid_t; #elif DEBUG /** Print a debug message with printf formatting. */ # define DPRINTF(fmt, ...) /**< Requires 2 or more args */ \ - fprintf(stderr, "%s:%d:(%p) " fmt "\n", __func__, __LINE__, pthread_self(), __VA_ARGS__) + fprintf(stderr, "%s:%d:(%zx) " fmt "\n", __func__, __LINE__, \ + (size_t) pthread_self(), __VA_ARGS__) #else # define DPRINTF(fmt, ...) ((void) 0) #endif @@ -1198,8 +1199,8 @@ mdb_txn_renew(MDB_txn *txn) rc = mdb_txn_renew0(txn); if (rc == MDB_SUCCESS) { DPRINTF("renew txn %zu%c %p on mdbenv %p, root page %zu", - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, - (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); } return rc; } @@ -1230,8 +1231,8 @@ mdb_txn_begin(MDB_env *env, unsigned int flags, MDB_txn **ret) else { *ret = txn; DPRINTF("begin txn %zu%c %p on mdbenv %p, root page %zu", - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, - (void *) env, txn->mt_dbs[MAIN_DBI].md_root); + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root); } return rc; @@ -1285,8 +1286,8 @@ mdb_txn_reset(MDB_txn *txn) return; DPRINTF("reset txn %zu%c %p on mdbenv %p, root page %zu", - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, - (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); mdb_txn_reset0(txn); } @@ -1298,8 +1299,8 @@ mdb_txn_abort(MDB_txn *txn) return; DPRINTF("abort txn %zu%c %p on mdbenv %p, root page %zu", - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, - (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); mdb_txn_reset0(txn); free(txn); @@ -1343,7 +1344,7 @@ mdb_txn_commit(MDB_txn *txn) goto done; DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu", - txn->mt_txnid, txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); + txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); mc.mc_txn = txn; mc.mc_dbi = FREE_DBI; From 350124854855ba3aeda6f07c8d6e4d1f27b835bc Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 11 Sep 2011 20:54:32 -0700 Subject: [PATCH 14/16] Fix 2e3bc39fa94f21d692d8e94183f57aef9122c487 Various mistakes when converting from previous data structures. Add a few more debug asserts/sanity checks. Split all "if (foo) return" constructs to separate lines to allow easier breakpoint setting. Add mtest6 for checking mdb_split() behavior. This needs to be expanded to check rebalance/merge cases too. --- libraries/libmdb/.gitignore | 2 +- libraries/libmdb/Makefile | 1 + libraries/libmdb/mdb.c | 414 ++++++++++++++++-------------------- libraries/libmdb/mtest6.c | 131 ++++++++++++ 4 files changed, 315 insertions(+), 233 deletions(-) create mode 100644 libraries/libmdb/mtest6.c diff --git a/libraries/libmdb/.gitignore b/libraries/libmdb/.gitignore index 7455ec9c11..9da29839c4 100644 --- a/libraries/libmdb/.gitignore +++ b/libraries/libmdb/.gitignore @@ -1,5 +1,5 @@ mtest -mtest[23] +mtest[23456] testdb mdb_stat *.[ao] diff --git a/libraries/libmdb/Makefile b/libraries/libmdb/Makefile index 3973979e87..e49e275638 100644 --- a/libraries/libmdb/Makefile +++ b/libraries/libmdb/Makefile @@ -26,6 +26,7 @@ mtest2: mtest2.o libmdb.a mtest3: mtest3.o libmdb.a mtest4: mtest4.o libmdb.a mtest5: mtest5.o libmdb.a +mtest6: mtest6.o libmdb.a mdb.o: mdb.c mdb.h midl.h $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -c mdb.c diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index b392826d07..806d493bb1 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -177,7 +177,7 @@ typedef ULONG pgno_t; #elif DEBUG /** Print a debug message with printf formatting. */ # define DPRINTF(fmt, ...) /**< Requires 2 or more args */ \ - fprintf(stderr, "%s:%d:(%p) " fmt "\n", __func__, __LINE__, pthread_self(), __VA_ARGS__) + fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__) #else # define DPRINTF(fmt, ...) ((void) 0) #endif @@ -615,6 +615,9 @@ typedef struct MDB_db { /** Handle for the default DB. */ #define MAIN_DBI 1 + /** Identify a data item as a valid sub-DB record */ +#define MDB_SUBDATA 0x8200 + /** Meta page content. */ typedef struct MDB_meta { /** Stamp identifying this as an MDB data file. It must be set @@ -702,6 +705,10 @@ struct MDB_cursor { MDB_txn *mc_txn; /** The database handle this cursor operates on */ MDB_dbi mc_dbi; + /** The database record for this cursor */ + MDB_db *mc_db; + /** The database auxiliary record for this cursor */ + MDB_dbx *mc_dbx; unsigned short mc_snum; /**< number of pushed pages */ unsigned short mc_top; /**< index of top page, mc_snum-1 */ unsigned int mc_flags; @@ -720,20 +727,10 @@ struct MDB_cursor { typedef struct MDB_xcursor { /** A sub-cursor for traversing the Dup DB */ MDB_cursor mx_cursor; - /** A fake transaction struct for pointing to our own table - * of DB info. - */ - MDB_txn mx_txn; - /** Our private DB information tables. Slots 0 and 1 are always - * copies of the corresponding slots in the main transaction. These - * hold the FREEDB and MAINDB, respectively. If the main cursor is - * on a sub-database, that will be copied to slot 2, and the duplicate - * database info will be in slot 3. If the main cursor is on the MAINDB - * then the duplicate DB info will be in slot 2 and slot 3 will be unused. - */ - MDB_dbx mx_dbxs[4]; - /** MDB_db table */ - MDB_db mx_dbs[4]; + /** The database record for this Dup DB */ + MDB_db mx_db; + /** The auxiliary DB record for this Dup DB */ + MDB_dbx mx_dbx; } MDB_xcursor; /** A set of pages freed by an earlier transaction. */ @@ -828,10 +825,9 @@ static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_curso static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); +static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi); static void mdb_xcursor_init0(MDB_cursor *mc); static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); -static void mdb_xcursor_init2(MDB_cursor *mc); -static void mdb_xcursor_fini(MDB_cursor *mc); static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); static size_t mdb_branch_size(MDB_env *env, MDB_val *key); @@ -957,10 +953,7 @@ mdb_alloc_page(MDB_cursor *mc, int num) MDB_node *leaf; ULONG *kptr, oldest; - m2.mc_txn = txn; - m2.mc_dbi = FREE_DBI; - m2.mc_snum = 0; - m2.mc_flags = 0; + mdb_cursor_init(&m2, txn, FREE_DBI); mdb_search_page(&m2, NULL, 0); leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0); kptr = (ULONG *)NODEKEY(leaf); @@ -1034,8 +1027,10 @@ mdb_alloc_page(MDB_cursor *mc, int num) if (pgno == P_INVALID) { /* DB size is maxed out */ - if (txn->mt_next_pgno + num >= txn->mt_env->me_maxpg) + if (txn->mt_next_pgno + num >= txn->mt_env->me_maxpg) { + assert(txn->mt_next_pgno + num < txn->mt_env->me_maxpg); return NULL; + } } if (txn->mt_env->me_dpages && num == 1) { np = txn->mt_env->me_dpages; @@ -1330,9 +1325,7 @@ mdb_txn_commit(MDB_txn *txn) DPRINTF("committing txn %lu %p on mdbenv %p, root page %lu", txn->mt_txnid, txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); - mc.mc_txn = txn; - mc.mc_dbi = FREE_DBI; - mc.mc_flags = 0; + mdb_cursor_init(&mc, txn, FREE_DBI); /* should only be one record now */ if (env->me_pghead) { @@ -1401,8 +1394,7 @@ mdb_txn_commit(MDB_txn *txn) MDB_val data; data.mv_size = sizeof(MDB_db); - mc.mc_dbi = MAIN_DBI; - mc.mc_flags = 0; + mdb_cursor_init(&mc, txn, MAIN_DBI); for (i = 2; i < txn->mt_numdbs; i++) { if (txn->mt_dbxs[i].md_dirty) { data.mv_data = &txn->mt_dbs[i]; @@ -1446,7 +1438,7 @@ mdb_txn_commit(MDB_txn *txn) return n; } } - done = 1;; + done = 1; #else struct iovec iov[MDB_COMMIT_PAGES]; n = 0; @@ -1778,7 +1770,8 @@ mdb_env_create(MDB_env **env) MDB_env *e; e = calloc(1, sizeof(MDB_env)); - if (!e) return ENOMEM; + if (!e) + return ENOMEM; e->me_maxreaders = DEFAULT_READERS; e->me_maxdbs = 2; @@ -2399,9 +2392,9 @@ mdb_search_node(MDB_cursor *mc, MDB_val *key, int *exactp) low = IS_LEAF(mp) ? 0 : 1; high = nkeys - 1; - cmp = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_cmp; + cmp = mc->mc_dbx->md_cmp; if (IS_LEAF2(mp)) { - nodekey.mv_size = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + nodekey.mv_size = mc->mc_db->md_pad; node = NODEPTR(mp, 0); /* fake */ } while (low <= high) { @@ -2474,8 +2467,10 @@ cursor_push_page(MDB_cursor *mc, MDB_page *mp) DPRINTF("pushing page %lu on db %u cursor %p", mp->mp_pgno, mc->mc_dbi, (void *) mc); - if (mc->mc_snum >= CURSOR_STACK) + if (mc->mc_snum >= CURSOR_STACK) { + assert(mc->mc_snum < CURSOR_STACK); return ENOMEM; + } mc->mc_top = mc->mc_snum++; mc->mc_pg[mc->mc_top] = mp; @@ -2588,7 +2583,7 @@ mdb_search_page(MDB_cursor *mc, MDB_val *key, int modify) DPUTS("transaction has failed, must abort"); return EINVAL; } else - root = mc->mc_txn->mt_dbs[mc->mc_dbi].md_root; + root = mc->mc_db->md_root; if (root == P_INVALID) { /* Tree is empty. */ DPUTS("tree is empty"); @@ -2606,19 +2601,18 @@ mdb_search_page(MDB_cursor *mc, MDB_val *key, int modify) if (modify) { /* For sub-databases, update main root first */ - if (mc->mc_dbi > MAIN_DBI && !mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty) { + if (mc->mc_dbi > MAIN_DBI && !mc->mc_dbx->md_dirty) { MDB_cursor mc2; - mc2.mc_txn = mc->mc_txn; - mc2.mc_dbi = MAIN_DBI; - rc = mdb_search_page(&mc2, &mc->mc_txn->mt_dbxs[mc->mc_dbi].md_name, 1); + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI); + rc = mdb_search_page(&mc2, &mc->mc_dbx->md_name, 1); if (rc) return rc; - mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty = 1; + mc->mc_dbx->md_dirty = 1; } if (!F_ISSET(mc->mc_pg[0]->mp_flags, P_DIRTY)) { if ((rc = mdb_touch(mc))) return rc; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = mc->mc_pg[0]->mp_pgno; + mc->mc_db->md_root = mc->mc_pg[0]->mp_pgno; } } @@ -2671,9 +2665,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, return EINVAL; } - mc.mc_txn = txn; - mc.mc_dbi = dbi; - mc.mc_flags = 0; + mdb_cursor_init(&mc, txn, dbi); if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { mc.mc_xcursor = &mx; mdb_xcursor_init0(&mc); @@ -2687,36 +2679,34 @@ static int mdb_sibling(MDB_cursor *mc, int move_right) { int rc; - unsigned int ptop; MDB_node *indx; MDB_page *mp; if (mc->mc_snum < 2) { return MDB_NOTFOUND; /* root has no siblings */ } - ptop = mc->mc_top-1; - - DPRINTF("parent page is page %lu, index %u", - mc->mc_pg[ptop]->mp_pgno, mc->mc_ki[ptop]); cursor_pop_page(mc); - if (move_right ? (mc->mc_ki[ptop] + 1u >= NUMKEYS(mc->mc_pg[ptop])) - : (mc->mc_ki[ptop] == 0)) { + DPRINTF("parent page is page %lu, index %u", + mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]); + + if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) + : (mc->mc_ki[mc->mc_top] == 0)) { DPRINTF("no more keys left, moving to %s sibling", move_right ? "right" : "left"); if ((rc = mdb_sibling(mc, move_right)) != MDB_SUCCESS) return rc; } else { if (move_right) - mc->mc_ki[ptop]++; + mc->mc_ki[mc->mc_top]++; else - mc->mc_ki[ptop]--; + mc->mc_ki[mc->mc_top]--; DPRINTF("just moving to %s index key %u", - move_right ? "right" : "left", mc->mc_ki[ptop]); + move_right ? "right" : "left", mc->mc_ki[mc->mc_top]); } - assert(IS_BRANCH(mc->mc_pg[ptop])); + assert(IS_BRANCH(mc->mc_pg[mc->mc_top])); - indx = NODEPTR(mc->mc_pg[ptop], mc->mc_ki[ptop]); + indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if ((rc = mdb_get_page(mc->mc_txn, NODEPGNO(indx), &mp))) return rc;; @@ -2740,7 +2730,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) mp = mc->mc_pg[mc->mc_top]; - if (mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPSORT) { + if (mc->mc_db->md_flags & MDB_DUPSORT) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { if (op == MDB_NEXT || op == MDB_NEXT_DUP) { @@ -2772,7 +2762,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); if (IS_LEAF2(mp)) { - key->mv_size = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + key->mv_size = mc->mc_db->md_pad; key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); return MDB_SUCCESS; } @@ -2809,7 +2799,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) mp = mc->mc_pg[mc->mc_top]; - if (mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPSORT) { + if (mc->mc_db->md_flags & MDB_DUPSORT) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (op == MDB_PREV || op == MDB_PREV_DUP) { if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -2844,7 +2834,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); if (IS_LEAF2(mp)) { - key->mv_size = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + key->mv_size = mc->mc_db->md_pad; key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); return MDB_SUCCESS; } @@ -2887,13 +2877,13 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_val nodekey; if (mc->mc_pg[mc->mc_top]->mp_flags & P_LEAF2) { - nodekey.mv_size = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + nodekey.mv_size = mc->mc_db->md_pad; nodekey.mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, nodekey.mv_size); } else { leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0); MDB_SET_KEY(leaf, &nodekey); } - rc = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_cmp(key, &nodekey); + rc = mc->mc_dbx->md_cmp(key, &nodekey); if (rc == 0) { /* Probably happens rarely, but first node on the page * was the one we wanted. @@ -2915,7 +2905,7 @@ set1: leaf = NODEPTR(mc->mc_pg[mc->mc_top], NUMKEYS(mc->mc_pg[mc->mc_top])-1); MDB_SET_KEY(leaf, &nodekey); } - rc = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_cmp(key, &nodekey); + rc = mc->mc_dbx->md_cmp(key, &nodekey); if (rc == 0) { /* last node was the one we wanted */ mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top])-1; @@ -2969,7 +2959,7 @@ set3: mc->mc_flags &= ~C_EOF; if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + key->mv_size = mc->mc_db->md_pad; key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); return MDB_SUCCESS; } @@ -2997,7 +2987,7 @@ set3: MDB_val d2; if ((rc = mdb_read_data(mc->mc_txn, leaf, &d2)) != MDB_SUCCESS) return rc; - rc = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dcmp(data, &d2); + rc = mc->mc_dbx->md_dcmp(data, &d2); if (rc) { if (op == MDB_GET_BOTH || rc > 0) return MDB_NOTFOUND; @@ -3032,8 +3022,10 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) mc->mc_flags |= C_INITIALIZED; mc->mc_flags &= ~C_EOF; + mc->mc_ki[mc->mc_top] = 0; + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + key->mv_size = mc->mc_db->md_pad; key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); return MDB_SUCCESS; } @@ -3077,7 +3069,7 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + key->mv_size = mc->mc_db->md_pad; key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); return MDB_SUCCESS; } @@ -3126,7 +3118,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, break; case MDB_GET_MULTIPLE: if (data == NULL || - !(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPFIXED) || + !(mc->mc_db->md_flags & MDB_DUPFIXED) || !(mc->mc_flags & C_INITIALIZED)) { rc = EINVAL; break; @@ -3138,7 +3130,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, goto fetchm; case MDB_NEXT_MULTIPLE: if (data == NULL || - !(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPFIXED)) { + !(mc->mc_db->md_flags & MDB_DUPFIXED)) { rc = EINVAL; break; } @@ -3152,7 +3144,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, fetchm: mx = &mc->mc_xcursor->mx_cursor; data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) * - mx->mc_txn->mt_dbs[mx->mc_dbi].md_pad; + mx->mc_db->md_pad; data->mv_data = METADATA(mx->mc_pg[mx->mc_top]); mx->mc_ki[mx->mc_top] = NUMKEYS(mx->mc_pg[mx->mc_top])-1; } else { @@ -3181,7 +3173,7 @@ fetchm: break; case MDB_FIRST_DUP: if (data == NULL || - !(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPSORT) || + !(mc->mc_db->md_flags & MDB_DUPSORT) || !(mc->mc_flags & C_INITIALIZED) || !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { rc = EINVAL; @@ -3194,7 +3186,7 @@ fetchm: break; case MDB_LAST_DUP: if (data == NULL || - !(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPSORT) || + !(mc->mc_db->md_flags & MDB_DUPSORT) || !(mc->mc_flags & C_INITIALIZED) || !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { rc = EINVAL; @@ -3216,20 +3208,21 @@ mdb_cursor_touch(MDB_cursor *mc) { int rc; - if (mc->mc_dbi > MAIN_DBI && !mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty) { + if (mc->mc_dbi > MAIN_DBI && !mc->mc_dbx->md_dirty) { MDB_cursor mc2; - mc2.mc_txn = mc->mc_txn; - mc2.mc_dbi = MAIN_DBI; - rc = mdb_search_page(&mc2, &mc->mc_txn->mt_dbxs[mc->mc_dbi].md_name, 1); - if (rc) return rc; - mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty = 1; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI); + rc = mdb_search_page(&mc2, &mc->mc_dbx->md_name, 1); + if (rc) + return rc; + mc->mc_dbx->md_dirty = 1; } for (mc->mc_top = 0; mc->mc_top < mc->mc_snum; mc->mc_top++) { if (!F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY)) { rc = mdb_touch(mc); - if (rc) return rc; + if (rc) + return rc; if (!mc->mc_top) { - mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = + mc->mc_db->md_root = mc->mc_pg[mc->mc_top]->mp_pgno; } } @@ -3263,7 +3256,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (!(mc->mc_flags & C_INITIALIZED)) return EINVAL; rc = MDB_SUCCESS; - } else if (mc->mc_txn->mt_dbs[mc->mc_dbi].md_root == P_INVALID) { + } else if (mc->mc_db->md_root == P_INVALID) { MDB_page *np; /* new database, write a root leaf page */ DPUTS("allocating new root leaf page"); @@ -3272,10 +3265,10 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, } mc->mc_snum = 0; cursor_push_page(mc, np); - mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = np->mp_pgno; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_depth++; - mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty = 1; - if ((mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) + mc->mc_db->md_root = np->mp_pgno; + mc->mc_db->md_depth++; + mc->mc_dbx->md_dirty = 1; + if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) == MDB_DUPFIXED) np->mp_flags |= P_LEAF2; mc->mc_flags |= C_INITIALIZED; @@ -3296,14 +3289,15 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, /* Cursor is positioned, now make sure all pages are writable */ rc2 = mdb_cursor_touch(mc); - if (rc2) return rc2; + if (rc2) + return rc2; top: /* The key already exists */ if (rc == MDB_SUCCESS) { /* there's only a key anyway, so this is a no-op */ if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - unsigned int ksize = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + unsigned int ksize = mc->mc_db->md_pad; if (key->mv_size != ksize) return EINVAL; if (flags == MDB_CURRENT) { @@ -3316,22 +3310,23 @@ top: leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); /* DB has dups? */ - if (F_ISSET(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags, MDB_DUPSORT)) { + if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) { /* Was a single item before, must convert now */ if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { dkey.mv_size = NODEDSZ(leaf); dkey.mv_data = dbuf; memcpy(dbuf, NODEDATA(leaf), dkey.mv_size); /* data matches, ignore it */ - if (!mdb_dcmp(mc->mc_txn, mc->mc_dbi, data, &dkey)) + if (!mc->mc_dbx->md_dcmp(data, &dkey)) return (flags == MDB_NODUPDATA) ? MDB_KEYEXIST : MDB_SUCCESS; memset(&dummy, 0, sizeof(dummy)); - if (mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPFIXED) { + if (mc->mc_db->md_flags & MDB_DUPFIXED) { dummy.md_pad = data->mv_size; dummy.md_flags = MDB_DUPFIXED; - if (mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_INTEGERDUP) + if (mc->mc_db->md_flags & MDB_INTEGERDUP) dummy.md_flags |= MDB_INTEGERKEY; } + dummy.md_flags |= MDB_SUBDATA; dummy.md_root = P_INVALID; if (dkey.mv_size == sizeof(MDB_db)) { memcpy(NODEDATA(leaf), &dummy, sizeof(dummy)); @@ -3386,11 +3381,10 @@ new_sub: * DB are all zero size. */ if (do_sub) { + MDB_db *db; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); put_sub: - if (flags == MDB_CURRENT) - mdb_xcursor_init2(mc); - else + if (flags != MDB_CURRENT) mdb_xcursor_init1(mc, leaf); xdata.mv_size = 0; xdata.mv_data = ""; @@ -3399,16 +3393,16 @@ put_sub: /* converted, write the original data first */ if (dkey.mv_size) { rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, flags); - if (rc) return rc; + if (rc) + return rc; leaf->mn_flags |= F_DUPDATA; } rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, flags); - mdb_xcursor_fini(mc); - memcpy(NODEDATA(leaf), - &mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi], - sizeof(MDB_db)); + db = NODEDATA(leaf); + assert((db->md_flags & MDB_SUBDATA) == MDB_SUBDATA); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); } - mc->mc_txn->mt_dbs[mc->mc_dbi].md_entries++; + mc->mc_db->md_entries++; } done: return rc; @@ -3427,22 +3421,20 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) return EINVAL; rc = mdb_cursor_touch(mc); - if (rc) return rc; + if (rc) + return rc; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!IS_LEAF2(mc->mc_pg[mc->mc_top]) && F_ISSET(leaf->mn_flags, F_DUPDATA)) { if (flags != MDB_NODUPDATA) { - mdb_xcursor_init2(mc); rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, 0); - mdb_xcursor_fini(mc); /* If sub-DB still has entries, we're done */ - if (mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_root - != P_INVALID) { - memcpy(NODEDATA(leaf), - &mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi], - sizeof(MDB_db)); - mc->mc_txn->mt_dbs[mc->mc_dbi].md_entries--; + if (mc->mc_xcursor->mx_db.md_root != P_INVALID) { + MDB_db *db = NODEDATA(leaf); + assert((db->md_flags & MDB_SUBDATA) == MDB_SUBDATA); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); + mc->mc_db->md_entries--; return rc; } /* otherwise fall thru and delete the sub-DB */ @@ -3456,26 +3448,28 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) unsigned int i; mx = &mc->mc_xcursor->mx_cursor; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_entries -= - mx->mc_txn->mt_dbs[mx->mc_dbi].md_entries; + mc->mc_db->md_entries -= + mx->mc_db->md_entries; cursor_pop_page(mx); - if (mx->mc_snum) { - while (mx->mc_snum > 1) { - for (i=0; imc_pg[mx->mc_top]); i++) { - pgno_t pg; - ni = NODEPTR(mx->mc_pg[mx->mc_top], i); - pg = NODEPGNO(ni); - /* free it */ - mdb_midl_append(mc->mc_txn->mt_free_pgs, pg); - } - rc = mdb_sibling(mx, 1); - if (rc) break; + while (mx->mc_snum > 1) { + for (i=0; imc_pg[mx->mc_top]); i++) { + MDB_page *mp; + pgno_t pg; + ni = NODEPTR(mx->mc_pg[mx->mc_top], i); + pg = NODEPGNO(ni); + if ((rc = mdb_get_page(mc->mc_txn, pg, &mp))) + return rc; + /* free it */ + mdb_midl_append(mc->mc_txn->mt_free_pgs, pg); } + rc = mdb_sibling(mx, 1); + if (rc) + break; } /* free it */ mdb_midl_append(mc->mc_txn->mt_free_pgs, - mx->mc_txn->mt_dbs[mx->mc_dbi].md_root); + mx->mc_db->md_root); } } @@ -3498,11 +3492,11 @@ mdb_new_page(MDB_cursor *mc, uint32_t flags, int num) np->mp_upper = mc->mc_txn->mt_env->me_psize; if (IS_BRANCH(np)) - mc->mc_txn->mt_dbs[mc->mc_dbi].md_branch_pages++; + mc->mc_db->md_branch_pages++; else if (IS_LEAF(np)) - mc->mc_txn->mt_dbs[mc->mc_dbi].md_leaf_pages++; + mc->mc_db->md_leaf_pages++; else if (IS_OVERFLOW(np)) { - mc->mc_txn->mt_dbs[mc->mc_dbi].md_overflow_pages += num; + mc->mc_db->md_overflow_pages += num; np->mp_pages = num; } @@ -3560,7 +3554,7 @@ mdb_add_node(MDB_cursor *mc, indx_t indx, if (IS_LEAF2(mp)) { /* Move higher keys up one slot. */ - int ksize = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad, dif; + int ksize = mc->mc_db->md_pad, dif; char *ptr = LEAF2KEY(mp, indx, ksize); dif = NUMKEYS(mp) - indx; if (dif > 0) @@ -3703,29 +3697,17 @@ static void mdb_xcursor_init0(MDB_cursor *mc) { MDB_xcursor *mx = mc->mc_xcursor; - MDB_dbi dbn; - - mx->mx_txn = *mc->mc_txn; - mx->mx_txn.mt_dbxs = mx->mx_dbxs; - mx->mx_txn.mt_dbs = mx->mx_dbs; - mx->mx_dbxs[0] = mc->mc_txn->mt_dbxs[0]; - mx->mx_dbxs[1] = mc->mc_txn->mt_dbxs[1]; - if (mc->mc_dbi > 1) { - mx->mx_dbxs[2] = mc->mc_txn->mt_dbxs[mc->mc_dbi]; - dbn = 2; - } else { - dbn = 1; - } - mx->mx_dbxs[dbn+1].md_parent = dbn; - mx->mx_dbxs[dbn+1].md_cmp = mx->mx_dbxs[dbn].md_dcmp; - mx->mx_dbxs[dbn+1].md_rel = mx->mx_dbxs[dbn].md_rel; - mx->mx_dbxs[dbn+1].md_dirty = 0; - mx->mx_txn.mt_numdbs = dbn+2; - mx->mx_txn.mt_u = mc->mc_txn->mt_u; mx->mx_cursor.mc_xcursor = NULL; - mx->mx_cursor.mc_txn = &mx->mx_txn; - mx->mx_cursor.mc_dbi = dbn+1; + mx->mx_cursor.mc_txn = mc->mc_txn; + mx->mx_cursor.mc_db = &mx->mx_db; + mx->mx_cursor.mc_dbx = &mx->mx_dbx; + mx->mx_cursor.mc_dbi = mc->mc_dbi+1; + mx->mx_dbx.md_parent = mc->mc_dbi; + mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; + mx->mx_dbx.md_dcmp = NULL; + mx->mx_dbx.md_rel = mc->mc_dbx->md_rel; + mx->mx_dbx.md_dirty = 0; } static void @@ -3733,57 +3715,27 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) { MDB_db *db = NODEDATA(node); MDB_xcursor *mx = mc->mc_xcursor; - MDB_dbi dbn; - mx->mx_dbs[0] = mc->mc_txn->mt_dbs[0]; - mx->mx_dbs[1] = mc->mc_txn->mt_dbs[1]; - if (mc->mc_dbi > 1) { - mx->mx_dbs[2] = mc->mc_txn->mt_dbs[mc->mc_dbi]; - mx->mx_dbxs[2].md_dirty = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty; - dbn = 3; - } else { - dbn = 2; - } - DPRINTF("Sub-db %u for db %u root page %lu", dbn, mc->mc_dbi, db->md_root); - mx->mx_dbs[dbn] = *db; + assert((db->md_flags & MDB_SUBDATA) == MDB_SUBDATA); + mx->mx_db = *db; + DPRINTF("Sub-db %u for db %u root page %zu", mx->mx_cursor.mc_dbi, mc->mc_dbi, + db->md_root); if (F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY)) - mx->mx_dbxs[dbn].md_dirty = 1; - mx->mx_dbxs[dbn].md_name.mv_data = NODEKEY(node); - mx->mx_dbxs[dbn].md_name.mv_size = node->mn_ksize; - mx->mx_txn.mt_next_pgno = mc->mc_txn->mt_next_pgno; + mx->mx_dbx.md_dirty = 1; + mx->mx_dbx.md_name.mv_data = NODEKEY(node); + mx->mx_dbx.md_name.mv_size = node->mn_ksize; mx->mx_cursor.mc_snum = 0; mx->mx_cursor.mc_flags = 0; } static void -mdb_xcursor_init2(MDB_cursor *mc) +mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi) { - MDB_xcursor *mx = mc->mc_xcursor; - MDB_dbi dbn; - mx->mx_dbs[0] = mc->mc_txn->mt_dbs[0]; - mx->mx_dbs[1] = mc->mc_txn->mt_dbs[1]; - if (mc->mc_dbi > 1) { - mx->mx_dbs[2] = mc->mc_txn->mt_dbs[mc->mc_dbi]; - mx->mx_dbxs[2].md_dirty = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty; - dbn = 3; - } else { - dbn = 2; - } - DPRINTF("Sub-db %u for db %u root page %lu", dbn, mc->mc_dbi, - mx->mx_dbs[dbn].md_root); - mx->mx_txn.mt_next_pgno = mc->mc_txn->mt_next_pgno; -} - -static void -mdb_xcursor_fini(MDB_cursor *mc) -{ - MDB_xcursor *mx = mc->mc_xcursor; - mc->mc_txn->mt_next_pgno = mx->mx_txn.mt_next_pgno; - mc->mc_txn->mt_dbs[0] = mx->mx_dbs[0]; - mc->mc_txn->mt_dbs[1] = mx->mx_dbs[1]; - if (mc->mc_dbi > 1) { - mc->mc_txn->mt_dbs[mc->mc_dbi] = mx->mx_dbs[2]; - mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty = mx->mx_dbxs[2].md_dirty; - } + mc->mc_dbi = dbi; + mc->mc_txn = txn; + mc->mc_db = &txn->mt_dbs[dbi]; + mc->mc_dbx = &txn->mt_dbxs[dbi]; + mc->mc_snum = 0; + mc->mc_flags = 0; } int @@ -3798,9 +3750,8 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) size += sizeof(MDB_xcursor); - if ((mc = calloc(1, size)) != NULL) { - mc->mc_dbi = dbi; - mc->mc_txn = txn; + if ((mc = malloc(size)) != NULL) { + mdb_cursor_init(mc, txn, dbi); if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { MDB_xcursor *mx = (MDB_xcursor *)(mc + 1); mc->mc_xcursor = mx; @@ -3824,7 +3775,7 @@ mdb_cursor_count(MDB_cursor *mc, unsigned long *countp) if (mc == NULL || countp == NULL) return EINVAL; - if (!(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPSORT)) + if (!(mc->mc_db->md_flags & MDB_DUPSORT)) return EINVAL; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); @@ -3834,7 +3785,7 @@ mdb_cursor_count(MDB_cursor *mc, unsigned long *countp) if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) return EINVAL; - *countp = mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_entries; + *countp = mc->mc_xcursor->mx_db.md_entries; } return MDB_SUCCESS; } @@ -3905,27 +3856,30 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst) /* Mark src and dst as dirty. */ if ((rc = mdb_touch(csrc)) || (rc = mdb_touch(cdst))) - return rc;; + return rc; if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */ - key.mv_size = csrc->mc_txn->mt_dbs[csrc->mc_dbi].md_pad; + key.mv_size = csrc->mc_db->md_pad; key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); data.mv_size = 0; data.mv_data = NULL; } else { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]); if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { unsigned int snum = csrc->mc_snum; + MDB_node *s2; /* must find the lowest key below src */ mdb_search_page_root(csrc, NULL, 0); - srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(s2); + key.mv_data = NODEKEY(s2); csrc->mc_snum = snum--; csrc->mc_top = snum; } else { - srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]); + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); } - key.mv_size = NODEKSZ(srcnode); - key.mv_data = NODEKEY(srcnode); data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); } @@ -3952,9 +3906,9 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst) if (csrc->mc_ki[csrc->mc_top] == 0) { if (csrc->mc_ki[csrc->mc_top-1] != 0) { if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); } else { - srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]); + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); key.mv_size = NODEKSZ(srcnode); key.mv_data = NODEKEY(srcnode); } @@ -4017,7 +3971,7 @@ mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst) */ j = NUMKEYS(cdst->mc_pg[cdst->mc_top]); if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - key.mv_size = csrc->mc_txn->mt_dbs[csrc->mc_dbi].md_pad; + key.mv_size = csrc->mc_db->md_pad; key.mv_data = METADATA(csrc->mc_pg[csrc->mc_top]); for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) { rc = mdb_add_node(cdst, j, &key, NULL, 0, 0); @@ -4053,9 +4007,9 @@ mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst) mdb_midl_append(csrc->mc_txn->mt_free_pgs, csrc->mc_pg[csrc->mc_top]->mp_pgno); if (IS_LEAF(csrc->mc_pg[csrc->mc_top])) - csrc->mc_txn->mt_dbs[csrc->mc_dbi].md_leaf_pages--; + csrc->mc_db->md_leaf_pages--; else - csrc->mc_txn->mt_dbs[csrc->mc_dbi].md_branch_pages--; + csrc->mc_db->md_branch_pages--; cursor_pop_page(csrc); return mdb_rebalance(csrc); @@ -4068,6 +4022,8 @@ mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst) cdst->mc_txn = csrc->mc_txn; cdst->mc_dbi = csrc->mc_dbi; + cdst->mc_db = csrc->mc_db; + cdst->mc_dbx = csrc->mc_dbx; cdst->mc_snum = csrc->mc_snum; cdst->mc_top = csrc->mc_top; cdst->mc_flags = csrc->mc_flags; @@ -4082,7 +4038,6 @@ static int mdb_rebalance(MDB_cursor *mc) { MDB_node *node; - MDB_page *root; int rc; unsigned int ptop; MDB_cursor mn; @@ -4100,18 +4055,20 @@ mdb_rebalance(MDB_cursor *mc) if (mc->mc_snum < 2) { if (NUMKEYS(mc->mc_pg[mc->mc_top]) == 0) { DPUTS("tree is completely empty"); - mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = P_INVALID; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_depth = 0; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_leaf_pages = 0; + mc->mc_db->md_root = P_INVALID; + mc->mc_db->md_depth = 0; + mc->mc_db->md_leaf_pages = 0; mdb_midl_append(mc->mc_txn->mt_free_pgs, mc->mc_pg[mc->mc_top]->mp_pgno); + mc->mc_snum = 0; } else if (IS_BRANCH(mc->mc_pg[mc->mc_top]) && NUMKEYS(mc->mc_pg[mc->mc_top]) == 1) { DPUTS("collapsing root page!"); mdb_midl_append(mc->mc_txn->mt_free_pgs, mc->mc_pg[mc->mc_top]->mp_pgno); - mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = NODEPGNO(NODEPTR(mc->mc_pg[mc->mc_top], 0)); - if ((rc = mdb_get_page(mc->mc_txn, mc->mc_txn->mt_dbs[mc->mc_dbi].md_root, &root))) + mc->mc_db->md_root = NODEPGNO(NODEPTR(mc->mc_pg[mc->mc_top], 0)); + if ((rc = mdb_get_page(mc->mc_txn, mc->mc_db->md_root, + &mc->mc_pg[mc->mc_top]))) return rc; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_depth--; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_branch_pages--; + mc->mc_db->md_depth--; + mc->mc_db->md_branch_pages--; } else DPUTS("root page doesn't need rebalancing"); return MDB_SUCCESS; @@ -4191,8 +4148,8 @@ mdb_del0(MDB_cursor *mc, MDB_node *leaf) pg++; } } - mdb_del_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad); - mc->mc_txn->mt_dbs[mc->mc_dbi].md_entries--; + mdb_del_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], mc->mc_db->md_pad); + mc->mc_db->md_entries--; rc = mdb_rebalance(mc); if (rc != MDB_SUCCESS) mc->mc_txn->mt_flags |= MDB_TXN_ERROR; @@ -4226,9 +4183,7 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi, return EINVAL; } - mc.mc_txn = txn; - mc.mc_dbi = dbi; - mc.mc_flags = 0; + mdb_cursor_init(&mc, txn, dbi); if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { mc.mc_xcursor = &mx; mdb_xcursor_init0(&mc); @@ -4287,17 +4242,17 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno) mc->mc_ki[1] = mc->mc_ki[0]; mc->mc_pg[0] = pp; mc->mc_ki[0] = 0; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = pp->mp_pgno; - DPRINTF("root split! new root = %lu", pp->mp_pgno); - mc->mc_txn->mt_dbs[mc->mc_dbi].md_depth++; + mc->mc_db->md_root = pp->mp_pgno; + DPRINTF("root split! new root = %zu", pp->mp_pgno); + mc->mc_db->md_depth++; /* Add left (implicit) pointer. */ if ((rc = mdb_add_node(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS) { /* undo the pre-push */ mc->mc_pg[0] = mc->mc_pg[1]; mc->mc_ki[0] = mc->mc_ki[1]; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_root = mp->mp_pgno; - mc->mc_txn->mt_dbs[mc->mc_dbi].md_depth--; + mc->mc_db->md_root = mp->mp_pgno; + mc->mc_db->md_depth--; return rc; } mc->mc_snum = 2; @@ -4326,7 +4281,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno) /* Move half of the keys to the right sibling */ copy = NULL; x = mc->mc_ki[mc->mc_top] - split_indx; - ksize = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad; + ksize = mc->mc_db->md_pad; split = LEAF2KEY(mp, split_indx, ksize); rsize = (nkeys - split_indx) * ksize; lsize = (nkeys - split_indx) * sizeof(indx_t); @@ -4549,10 +4504,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA)) != flags) return EINVAL; - mc.mc_txn = txn; - mc.mc_dbi = dbi; - mc.mc_snum = 0; - mc.mc_flags = 0; + mdb_cursor_init(&mc, txn, dbi); if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { mc.mc_xcursor = &mx; mdb_xcursor_init0(&mc); @@ -4701,9 +4653,7 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) memset(&dummy, 0, sizeof(dummy)); dummy.md_root = P_INVALID; dummy.md_flags = flags & 0xffff; - mc.mc_txn = txn; - mc.mc_dbi = MAIN_DBI; - mc.mc_flags = 0; + mdb_cursor_init(&mc, txn, MAIN_DBI); rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); dirty = 1; } diff --git a/libraries/libmdb/mtest6.c b/libraries/libmdb/mtest6.c new file mode 100644 index 0000000000..36ad267501 --- /dev/null +++ b/libraries/libmdb/mtest6.c @@ -0,0 +1,131 @@ +/* mtest6.c - memory-mapped database tester/toy */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Tests for DB splits and merges */ +#define _XOPEN_SOURCE 500 /* srandom(), random() */ +#include +#include +#include +#include +#include "mdb.h" + +char dkbuf[1024]; + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + long kval; + char *sval; + + srandom(time(NULL)); + + rc = mdb_env_create(&env); + rc = mdb_env_set_mapsize(env, 10485760); + rc = mdb_env_set_maxdbs(env, 4); + rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664); + rc = mdb_txn_begin(env, 0, &txn); + rc = mdb_open(txn, "id2", MDB_CREATE|MDB_INTEGERKEY, &dbi); + rc = mdb_cursor_open(txn, dbi, &cursor); + rc = mdb_stat(txn, dbi, &mst); + + sval = calloc(1, mst.ms_psize / 4); + key.mv_size = sizeof(long); + key.mv_data = &kval; + data.mv_size = mst.ms_psize / 4 - 30; + data.mv_data = sval; + + printf("Adding 12 values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5; + sprintf(sval, "%08x", kval); + rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+4; + sprintf(sval, "%08x", kval); + rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+1; + sprintf(sval, "%08x", kval); + rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); + } + rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST); + + do { + printf("key: %p %s, data: %p %.*s\n", + key.mv_data, mdb_dkey(&key, dkbuf), + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0); + mdb_cursor_close(cursor); + mdb_txn_commit(txn); + +#if 0 + j=0; + + for (i= count - 1; i > -1; i-= (random()%5)) { + j++; + txn=NULL; + rc = mdb_txn_begin(env, 0, &txn); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + rc = mdb_del(txn, dbi, &key, &data); + if (rc) { + j--; + mdb_txn_abort(txn); + } else { + rc = mdb_txn_commit(txn); + } + } + free(values); + printf("Deleted %d values\n", j); + + rc = mdb_env_stat(env, &mst); + rc = mdb_txn_begin(env, 1, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(txn, dbi); + + mdb_txn_abort(txn); +#endif + mdb_env_close(env); + + return 0; +} From 8acb7550349352522a4e2eb10bfa27850df89079 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 11 Sep 2011 23:13:36 -0700 Subject: [PATCH 15/16] One more sub-cursor fix --- libraries/libmdb/mdb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 225ffc4195..386912464e 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -3010,6 +3010,9 @@ set3: } } else { + if (mc->mc_db->md_flags & MDB_DUPSORT) { + mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED; + } if ((rc = mdb_read_data(mc->mc_txn, leaf, data)) != MDB_SUCCESS) return rc; } From 25529a4c36903d0456b1251712de32f665850029 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 11 Sep 2011 23:57:17 -0700 Subject: [PATCH 16/16] More cursor_init cleanup --- libraries/libmdb/mdb.c | 61 +++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 386912464e..8769ebb4aa 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -839,7 +839,7 @@ static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_curso static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); -static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi); +static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); static void mdb_xcursor_init0(MDB_cursor *mc); static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); @@ -967,7 +967,7 @@ mdb_alloc_page(MDB_cursor *mc, int num) MDB_node *leaf; txnid_t *kptr, oldest; - mdb_cursor_init(&m2, txn, FREE_DBI); + mdb_cursor_init(&m2, txn, FREE_DBI, NULL); mdb_search_page(&m2, NULL, 0); leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0); kptr = (txnid_t *)NODEKEY(leaf); @@ -1340,7 +1340,7 @@ mdb_txn_commit(MDB_txn *txn) DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu", txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); - mdb_cursor_init(&mc, txn, FREE_DBI); + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); /* should only be one record now */ if (env->me_pghead) { @@ -1410,7 +1410,7 @@ mdb_txn_commit(MDB_txn *txn) MDB_val data; data.mv_size = sizeof(MDB_db); - mdb_cursor_init(&mc, txn, MAIN_DBI); + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); for (i = 2; i < txn->mt_numdbs; i++) { if (txn->mt_dbxs[i].md_dirty) { data.mv_data = &txn->mt_dbs[i]; @@ -2619,7 +2619,7 @@ mdb_search_page(MDB_cursor *mc, MDB_val *key, int modify) /* For sub-databases, update main root first */ if (mc->mc_dbi > MAIN_DBI && !mc->mc_dbx->md_dirty) { MDB_cursor mc2; - mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI); + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); rc = mdb_search_page(&mc2, &mc->mc_dbx->md_name, 1); if (rc) return rc; @@ -2681,13 +2681,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, return EINVAL; } - mdb_cursor_init(&mc, txn, dbi); - if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { - mc.mc_xcursor = &mx; - mdb_xcursor_init0(&mc); - } else { - mc.mc_xcursor = NULL; - } + mdb_cursor_init(&mc, txn, dbi, &mx); return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); } @@ -3010,9 +3004,8 @@ set3: } } else { - if (mc->mc_db->md_flags & MDB_DUPSORT) { - mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED; - } + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags = 0; if ((rc = mdb_read_data(mc->mc_txn, leaf, data)) != MDB_SUCCESS) return rc; } @@ -3100,6 +3093,8 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) if (rc) return rc; } else { + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags = 0; if ((rc = mdb_read_data(mc->mc_txn, leaf, data)) != MDB_SUCCESS) return rc; } @@ -3229,7 +3224,7 @@ mdb_cursor_touch(MDB_cursor *mc) if (mc->mc_dbi > MAIN_DBI && !mc->mc_dbx->md_dirty) { MDB_cursor mc2; - mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI); + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); rc = mdb_search_page(&mc2, &mc->mc_dbx->md_name, 1); if (rc) return rc; @@ -3747,7 +3742,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) } static void -mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi) +mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) { mc->mc_dbi = dbi; mc->mc_txn = txn; @@ -3755,12 +3750,20 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi) mc->mc_dbx = &txn->mt_dbxs[dbi]; mc->mc_snum = 0; mc->mc_flags = 0; + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { + assert(mx != NULL); + mc->mc_xcursor = mx; + mdb_xcursor_init0(mc); + } else { + mc->mc_xcursor = NULL; + } } int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) { MDB_cursor *mc; + MDB_xcursor *mx = NULL; size_t size = sizeof(MDB_cursor); if (txn == NULL || ret == NULL || !dbi || dbi >= txn->mt_numdbs) @@ -3770,12 +3773,10 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) size += sizeof(MDB_xcursor); if ((mc = malloc(size)) != NULL) { - mdb_cursor_init(mc, txn, dbi); if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { - MDB_xcursor *mx = (MDB_xcursor *)(mc + 1); - mc->mc_xcursor = mx; - mdb_xcursor_init0(mc); + mx = (MDB_xcursor *)(mc + 1); } + mdb_cursor_init(mc, txn, dbi, mx); } else { return ENOMEM; } @@ -4203,13 +4204,7 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi, return EINVAL; } - mdb_cursor_init(&mc, txn, dbi); - if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { - mc.mc_xcursor = &mx; - mdb_xcursor_init0(&mc); - } else { - mc.mc_xcursor = NULL; - } + mdb_cursor_init(&mc, txn, dbi, &mx); exact = 0; if (data) { @@ -4524,13 +4519,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA)) != flags) return EINVAL; - mdb_cursor_init(&mc, txn, dbi); - if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { - mc.mc_xcursor = &mx; - mdb_xcursor_init0(&mc); - } else { - mc.mc_xcursor = NULL; - } + mdb_cursor_init(&mc, txn, dbi, &mx); return mdb_cursor_put(&mc, key, data, flags); } @@ -4673,7 +4662,7 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) memset(&dummy, 0, sizeof(dummy)); dummy.md_root = P_INVALID; dummy.md_flags = flags & 0xffff; - mdb_cursor_init(&mc, txn, MAIN_DBI); + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); dirty = 1; }