From aa10c98f3f8effc7c200e08a56b20a6816051e7e Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 3 Dec 2014 12:37:55 +0100 Subject: [PATCH 01/34] For ITS#7789: Ensure mapsize >= pages in use. Check new mapsizes against mm_last_pg. Move mdb_env_init_meta0() so it can set mm_last_pg earlier. --- libraries/liblmdb/mdb.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 828e43205d..d5387bc3bc 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -3513,6 +3513,7 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) return 0; } +/** Fill in most of the zeroed #MDB_meta for an empty database environment */ static void ESECT mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) { @@ -3529,7 +3530,7 @@ mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) /** Write the environment parameters of a freshly created DB environment. * @param[in] env the environment handle - * @param[out] meta address of where to store the meta information + * @param[in] meta the #MDB_meta to write * @return 0 on success, non-zero on failure. */ static int ESECT @@ -3557,8 +3558,6 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) psize = env->me_psize; - mdb_env_init_meta0(env, meta); - p = calloc(2, psize); if (!p) return ENOMEM; @@ -3836,16 +3835,16 @@ mdb_env_set_mapsize(MDB_env *env, size_t size) */ if (env->me_map) { int rc; + MDB_meta *meta; void *old; if (env->me_txn) return EINVAL; + meta = env->me_metas[mdb_env_pick_meta(env)]; if (!size) - size = env->me_metas[mdb_env_pick_meta(env)]->mm_mapsize; - else if (size < env->me_mapsize) { - /* If the configured size is smaller, make sure it's - * still big enough. Silently round up to minimum if not. - */ - size_t minsize = (env->me_metas[mdb_env_pick_meta(env)]->mm_last_pg + 1) * env->me_psize; + size = meta->mm_mapsize; + { + /* Silently round up to minimum if the size is too small */ + size_t minsize = (meta->mm_last_pg + 1) * env->me_psize; if (size < minsize) size = minsize; } @@ -3980,8 +3979,6 @@ mdb_env_open2(MDB_env *env) } #endif - memset(&meta, 0, sizeof(meta)); - if ((i = mdb_env_read_header(env, &meta)) != 0) { if (i != ENOENT) return i; @@ -3990,24 +3987,26 @@ mdb_env_open2(MDB_env *env) env->me_psize = env->me_os_psize; if (env->me_psize > MAX_PAGESIZE) env->me_psize = MAX_PAGESIZE; + memset(&meta, 0, sizeof(meta)); + mdb_env_init_meta0(env, &meta); + meta.mm_mapsize = DEFAULT_MAPSIZE; } else { env->me_psize = meta.mm_psize; } /* Was a mapsize configured? */ if (!env->me_mapsize) { - /* If this is a new environment, take the default, - * else use the size recorded in the existing env. - */ - env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize; - } else if (env->me_mapsize < meta.mm_mapsize) { - /* If the configured size is smaller, make sure it's - * still big enough. Silently round up to minimum if not. + env->me_mapsize = meta.mm_mapsize; + } + { + /* Make sure mapsize >= committed data size. Even when using + * mm_mapsize, which could be broken in old files (ITS#7789). */ size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; if (env->me_mapsize < minsize) env->me_mapsize = minsize; } + meta.mm_mapsize = env->me_mapsize; rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); if (rc) From f3d3f60e3dd051f2a4fcb0fe4deb000e988f3eab Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 8 Dec 2014 09:56:36 +0100 Subject: [PATCH 02/34] Try to avoid an invalid datafile after failed init --- libraries/liblmdb/mdb.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index d5387bc3bc..da09f8dbfe 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4008,6 +4008,20 @@ mdb_env_open2(MDB_env *env) } meta.mm_mapsize = env->me_mapsize; + if (newenv && !(flags & MDB_FIXEDMAP)) { + /* mdb_env_map() may grow the datafile. Write the metapages + * first, so the file will be valid if initialization fails. + * Except with FIXEDMAP, since we do not yet know mm_address. + * We could fill in mm_address later, but then a different + * program might end up doing that - one with a memory layout + * and map address which does not suit the main program. + */ + rc = mdb_env_init_meta(env, &meta); + if (rc) + return rc; + newenv = 0; + } + rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); if (rc) return rc; From 076b773d6bab49e1b467051cc09f2522444a22f3 Mon Sep 17 00:00:00 2001 From: David Barbour Date: Mon, 12 Jan 2015 23:00:30 +0100 Subject: [PATCH 03/34] ITS#7994 Access to current transaction ID. commit b660491d3fa359325cb3615a276bba78f32e2075 (mdb_txn_id()) plus c36c167cc49a59d4f40ae5fc259c013de601164e (fix prev commit) from mdb.master. --- libraries/liblmdb/lmdb.h | 11 +++++++++++ libraries/liblmdb/mdb.c | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index c5e5c9bf12..ee302a2365 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -953,6 +953,17 @@ int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn ** */ MDB_env *mdb_txn_env(MDB_txn *txn); + /** @brief Return the transaction's ID. + * + * This returns the identifier associated with this transaction. For a + * read-only transaction, this corresponds to the snapshot being read; + * concurrent readers will frequently have the same transaction ID. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A transaction ID, valid if input is an active transaction. + */ +size_t mdb_txn_id(MDB_txn *txn); + /** @brief Commit all the operations of a transaction into the database. * * The transaction handle is freed. It and its cursors must not be used diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index da09f8dbfe..2ae60436b5 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -2766,6 +2766,13 @@ mdb_txn_env(MDB_txn *txn) return txn->mt_env; } +size_t +mdb_txn_id(MDB_txn *txn) +{ + if(!txn) return 0; + return txn->mt_txnid; +} + /** Export or close DBI handles opened in this txn. */ static void mdb_dbis_update(MDB_txn *txn, int keep) From c65ca298b946251bf9fa70de4b0d1a449bbad94e Mon Sep 17 00:00:00 2001 From: "leo@yuriev.ru" Date: Sat, 17 Jan 2015 06:50:12 +0100 Subject: [PATCH 04/34] ITS#7971 mdb_txn_renew0(): Fix new readers slots. commit 9a8eb95674c7b500cfe5f44d03493ff76c9fc0c1 (mdb_txn_renew0) plus b2ab9910dd5f13ca2d92eeb4c39a90b922df8dfe (reader allocation) from mdb.master. --- libraries/liblmdb/mdb.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 2ae60436b5..67e0e19165 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1121,7 +1121,8 @@ struct MDB_env { unsigned int me_psize; /**< DB page size, inited from me_os_psize */ unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */ unsigned int me_maxreaders; /**< size of the reader table */ - unsigned int me_numreaders; /**< max numreaders set by this env */ + /** Max #MDB_txninfo.%mti_numreaders of interest to #mdb_env_close() */ + volatile int me_close_readers; MDB_dbi me_numdbs; /**< number of DBs opened */ MDB_dbi me_maxdbs; /**< size of the DB table */ MDB_PID_T me_pid; /**< process ID of this env */ @@ -2545,15 +2546,22 @@ mdb_txn_renew0(MDB_txn *txn) UNLOCK_MUTEX_R(env); return MDB_READERS_FULL; } - ti->mti_readers[i].mr_pid = pid; - ti->mti_readers[i].mr_tid = tid; + r = &ti->mti_readers[i]; + /* Claim the reader slot, carefully since other code + * uses the reader table un-mutexed: First reset the + * slot, next publish it in mti_numreaders. After + * that, it is safe for mdb_env_close() to touch it. + * When it will be closed, we can finally claim it. + */ + r->mr_pid = 0; + r->mr_txnid = (txnid_t)-1; + r->mr_tid = tid; if (i == nr) ti->mti_numreaders = ++nr; - /* Save numreaders for un-mutexed mdb_env_close() */ - env->me_numreaders = nr; + env->me_close_readers = nr; + r->mr_pid = pid; UNLOCK_MUTEX_R(env); - r = &ti->mti_readers[i]; new_notls = (env->me_flags & MDB_NOTLS); if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) { r->mr_pid = 0; @@ -4747,8 +4755,12 @@ mdb_env_close0(MDB_env *env, int excl) MDB_PID_T pid = env->me_pid; /* Clearing readers is done in this function because * me_txkey with its destructor must be disabled first. + * + * We skip the the reader mutex, so we touch only + * data owned by this process (me_close_readers and + * our readers), and clear each reader atomically. */ - for (i = env->me_numreaders; --i >= 0; ) + for (i = env->me_close_readers; --i >= 0; ) if (env->me_txns->mti_readers[i].mr_pid == pid) env->me_txns->mti_readers[i].mr_pid = 0; #ifdef _WIN32 @@ -9181,11 +9193,7 @@ mdb_env_info(MDB_env *env, MDB_envinfo *arg) arg->me_mapaddr = env->me_metas[toggle]->mm_address; arg->me_mapsize = env->me_mapsize; arg->me_maxreaders = env->me_maxreaders; - - /* me_numreaders may be zero if this process never used any readers. Use - * the shared numreader count if it exists. - */ - arg->me_numreaders = env->me_txns ? env->me_txns->mti_numreaders : env->me_numreaders; + arg->me_numreaders = env->me_txns ? env->me_txns->mti_numreaders : 0; arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; From c0170bc0ec987e82f57298ad88ebbe2e9a300130 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 8 Apr 2015 21:51:50 +0200 Subject: [PATCH 05/34] Copy some env flags to txn. Factor out flags. Taken from mdb_txn_begin(,,MDB_NOMETASYNC, MDB_NOSYNC,,) without adding those two flags yet, to align with mdb.master: Part of 54516639acab87aad156230f8a799e9128d266fe (Renumber...) + 8adee9464f65d1702b81a7c604f1a48baa7a0ad5 (Fix per-txn...). --- libraries/liblmdb/mdb.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 67e0e19165..2444d23779 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1016,7 +1016,11 @@ struct MDB_txn { * @ingroup internal * @{ */ -#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ + /** #mdb_txn_begin() flags */ +#define MDB_TXN_BEGIN_FLAGS MDB_RDONLY +#define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */ + /* internal txn flags */ +#define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */ #define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */ #define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */ #define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */ @@ -1942,7 +1946,7 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp) MDB_ID2 mid; int rc, (*insert)(MDB_ID2L, MDB_ID2 *); - if (txn->mt_env->me_flags & MDB_WRITEMAP) { + if (txn->mt_flags & MDB_TXN_WRITEMAP) { insert = mdb_mid2l_append; } else { insert = mdb_mid2l_insert; @@ -2662,18 +2666,20 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) MDB_ntxn *ntxn; int rc, size, tsize = sizeof(MDB_txn); + flags &= MDB_TXN_BEGIN_FLAGS; + flags |= env->me_flags & MDB_WRITEMAP; + if (env->me_flags & MDB_FATAL_ERROR) { DPUTS("environment had fatal error, must shutdown!"); return MDB_PANIC; } - if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY)) + if (env->me_flags & MDB_RDONLY & ~flags) /* write txn in RDONLY env */ return EACCES; if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ + flags |= parent->mt_flags; if (parent->mt_child || - (flags & MDB_RDONLY) || - (parent->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) || - (env->me_flags & MDB_WRITEMAP)) + (flags & (MDB_RDONLY|MDB_WRITEMAP|MDB_TXN_ERROR))) { return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN; } @@ -2696,7 +2702,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) } txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); if (flags & MDB_RDONLY) { - txn->mt_flags |= MDB_TXN_RDONLY; txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs); txn->mt_dbiseqs = env->me_dbiseqs; } else { @@ -2709,6 +2714,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); } } + txn->mt_flags = flags; txn->mt_env = env; ok: @@ -2730,7 +2736,6 @@ ok: parent->mt_child = txn; txn->mt_parent = parent; txn->mt_numdbs = parent->mt_numdbs; - txn->mt_flags = parent->mt_flags; txn->mt_dbxs = parent->mt_dbxs; memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); /* Copy parent's mt_dbflags, but clear DB_NEW */ @@ -2758,9 +2763,10 @@ ok: if (txn != env->me_txn0) free(txn); } else { + txn->mt_flags |= flags; /* for txn==me_txn0, no effect otherwise */ *ret = txn; DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w', (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root)); } @@ -3606,6 +3612,7 @@ mdb_env_write_meta(MDB_txn *txn) { MDB_env *env; MDB_meta meta, metab, *mp; + unsigned flags; size_t mapsize; off_t off; int rc, len, toggle; @@ -3622,19 +3629,20 @@ mdb_env_write_meta(MDB_txn *txn) toggle, txn->mt_dbs[MAIN_DBI].md_root)); env = txn->mt_env; + flags = env->me_flags; mp = env->me_metas[toggle]; mapsize = env->me_metas[toggle ^ 1]->mm_mapsize; /* Persist any increases of mapsize config */ if (mapsize < env->me_mapsize) mapsize = env->me_mapsize; - if (env->me_flags & MDB_WRITEMAP) { + if (flags & MDB_WRITEMAP) { mp->mm_mapsize = mapsize; mp->mm_dbs[0] = txn->mt_dbs[0]; mp->mm_dbs[1] = txn->mt_dbs[1]; mp->mm_last_pg = txn->mt_next_pgno - 1; mp->mm_txnid = txn->mt_txnid; - if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { + if (!(flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { unsigned meta_size = env->me_psize; rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; ptr = env->me_map; @@ -3670,8 +3678,7 @@ mdb_env_write_meta(MDB_txn *txn) off += PAGEHDRSZ; /* Write to the SYNC fd */ - mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ? - env->me_fd : env->me_mfd; + mfd = (flags & (MDB_NOSYNC|MDB_NOMETASYNC)) ? env->me_fd : env->me_mfd; #ifdef _WIN32 { memset(&ov, 0, sizeof(ov)); @@ -5081,7 +5088,7 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) MDB_page *p = NULL; int level; - if (!((txn->mt_flags & MDB_TXN_RDONLY) | (env->me_flags & MDB_WRITEMAP))) { + if (! (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_WRITEMAP))) { MDB_txn *tx2 = txn; level = 1; do { From c616689d9ada98a21fab6e7143b2f014c92d021d Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 8 Apr 2015 21:52:05 +0200 Subject: [PATCH 06/34] Cleanup MDB_env.me_txn0. More fallout from 4d02c741b120786df1b87ee9ed49c1d3f9bc7522. --- libraries/liblmdb/mdb.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 2444d23779..1e0579c8a7 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -2581,6 +2581,7 @@ mdb_txn_renew0(MDB_txn *txn) } txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ } else { + /* Not yet touching txn == env->me_txn0, it may be active */ if (ti) { LOCK_MUTEX_W(env); @@ -2664,7 +2665,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) { MDB_txn *txn; MDB_ntxn *ntxn; - int rc, size, tsize = sizeof(MDB_txn); + int rc, size, tsize; flags &= MDB_TXN_BEGIN_FLAGS; flags |= env->me_flags & MDB_WRITEMAP; @@ -2675,6 +2676,8 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) } if (env->me_flags & MDB_RDONLY & ~flags) /* write txn in RDONLY env */ return EACCES; + + size = tsize = sizeof(MDB_txn); if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ flags |= parent->mt_flags; @@ -2683,16 +2686,15 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) { return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN; } - tsize = sizeof(MDB_ntxn); - } - size = tsize; - if (!(flags & MDB_RDONLY)) { - if (!parent) { - txn = env->me_txn0; /* just reuse preallocated write txn */ - goto ok; - } - /* child txns use own copy of cursors */ + /* Child txns save MDB_pgstate and use own copy of cursors */ + size = tsize = sizeof(MDB_ntxn); size += env->me_maxdbs * sizeof(MDB_cursor *); + } else if (!(flags & MDB_RDONLY)) { + /* Reuse preallocated write txn. However, do not touch it until + * mdb_txn_renew0() succeeds, since it currently may be active. + */ + txn = env->me_txn0; + goto renew; } size += env->me_maxdbs * (sizeof(MDB_db)+1); @@ -2717,7 +2719,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) txn->mt_flags = flags; txn->mt_env = env; -ok: if (parent) { unsigned int i; txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE); @@ -2757,6 +2758,7 @@ ok: if (rc) mdb_txn_reset0(txn, "beginchild-fail"); } else { +renew: rc = mdb_txn_renew0(txn); } if (rc) { @@ -4684,15 +4686,13 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode if (rc) goto leave; } - if (!((flags & MDB_RDONLY) || - (env->me_pbuf = calloc(1, env->me_psize)))) - rc = ENOMEM; if (!(flags & MDB_RDONLY)) { MDB_txn *txn; int tsize = sizeof(MDB_txn), size = tsize + env->me_maxdbs * (sizeof(MDB_db)+sizeof(MDB_cursor *)+sizeof(unsigned int)+1); - txn = calloc(1, size); - if (txn) { + if ((env->me_pbuf = calloc(1, env->me_psize)) && + (txn = calloc(1, size))) + { txn->mt_dbs = (MDB_db *)((char *)txn + tsize); txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); From 3f6fa7aecec134b1c8d16186bc178c541a49d381 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 20 May 2015 04:04:38 +0200 Subject: [PATCH 07/34] Set/clear mp_pad, md_pad (MDB_DUPFIXED data size). mdb_xcursor_init1(): md_pad is only used when MDB_DUPFIXED. mdb_page_split(): Copy mp_pad too. Used by mdb_page_list(). --- libraries/liblmdb/mdb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 1e0579c8a7..797c67b32e 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -7157,7 +7157,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) mx->mx_cursor.mc_flags = C_SUB; } else { MDB_page *fp = NODEDATA(node); - mx->mx_db.md_pad = mc->mc_pg[mc->mc_top]->mp_pad; + mx->mx_db.md_pad = 0; mx->mx_db.md_flags = 0; mx->mx_db.md_depth = 1; mx->mx_db.md_branch_pages = 0; @@ -8152,6 +8152,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno /* Create a right sibling. */ if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp))) return rc; + rp->mp_pad = mp->mp_pad; DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno)); if (mc->mc_snum < 2) { From 086bc04cd2852753cc20eb812b9054eb545e3088 Mon Sep 17 00:00:00 2001 From: "leo@yuriev.ru" Date: Fri, 3 Jul 2015 23:01:00 +0200 Subject: [PATCH 08/34] ITS#7969 Use __sync_synchronize() The rest of 9a80a8a8e8feed56fbccd8851b8a789f7fff9c11 plus commit a937740aa0c47dc7a1a5e9be42dcea2dd3c81683. --- libraries/liblmdb/mdb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 797c67b32e..2ee0b6a149 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -3643,6 +3643,11 @@ mdb_env_write_meta(MDB_txn *txn) mp->mm_dbs[0] = txn->mt_dbs[0]; mp->mm_dbs[1] = txn->mt_dbs[1]; mp->mm_last_pg = txn->mt_next_pgno - 1; +#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 404) && /* TODO: portability */ \ + !(defined(__i386__) || defined(__x86_64__)) + /* LY: issue a memory barrier, if not x86. ITS#7969 */ + __sync_synchronize(); +#endif mp->mm_txnid = txn->mt_txnid; if (!(flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { unsigned meta_size = env->me_psize; From f25c4f0db9416248f260e8b24043226aecfa7122 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Fri, 3 Jul 2015 23:02:12 +0200 Subject: [PATCH 09/34] Whitespace (align with mdb.master) --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 2ee0b6a149..440966b9ef 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -3960,6 +3960,7 @@ mdb_env_open2(MDB_env *env) else env->me_pidquery = PROCESS_QUERY_INFORMATION; #endif /* _WIN32 */ + #ifdef BROKEN_FDATASYNC /* ext3/ext4 fdatasync is broken on some older Linux kernels. * https://lkml.org/lkml/2012/9/3/83 @@ -4816,7 +4817,6 @@ mdb_env_close0(MDB_env *env, int excl) env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); } - void ESECT mdb_env_close(MDB_env *env) { From 3a71450436b7a53b90032f8606a37269fd90ffa8 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Fri, 3 Jul 2015 23:07:00 +0200 Subject: [PATCH 10/34] Add MDB_USE_POSIX_MUTEX. So far just to help aligning mdb.master and mdb.RE. We'll make it an option later. --- libraries/liblmdb/mdb.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 440966b9ef..1cd915042d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -121,9 +121,16 @@ extern int cacheflush(char *addr, int nbytes, int cache); #ifdef MDB_USE_POSIX_SEM # define MDB_USE_HASH 1 #include +#else +#define MDB_USE_POSIX_MUTEX 1 #endif #endif +#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) \ + + defined(MDB_USE_POSIX_MUTEX) != 1 +# error "Ambiguous shared-lock implementation" +#endif + #ifdef USE_VALGRIND #include #define VGMEMP_CREATE(h,r,z) VALGRIND_CREATE_MEMPOOL(h,r,z) @@ -278,7 +285,7 @@ mdb_sem_wait(sem_t *sem) return rc; } -#else +#else /* MDB_USE_POSIX_MUTEX: */ /** Lock the reader mutex. */ #define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex) @@ -1219,7 +1226,7 @@ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static int mdb_env_pick_meta(const MDB_env *env); static int mdb_env_write_meta(MDB_txn *txn); -#if !(defined(_WIN32) || defined(MDB_USE_POSIX_SEM)) /* Drop unused excl arg */ +#ifdef MDB_USE_POSIX_MUTEX /* Drop unused excl arg */ # define mdb_env_close0(env, excl) mdb_env_close1(env) #endif static void mdb_env_close0(MDB_env *env, int excl); @@ -4234,8 +4241,8 @@ mdb_env_excl_lock(MDB_env *env, int *excl) if (!rc) { *excl = 1; } else -# ifdef MDB_USE_POSIX_SEM - if (*excl < 0) /* always true when !MDB_USE_POSIX_SEM */ +# ifndef MDB_USE_POSIX_MUTEX + if (*excl < 0) /* always true when MDB_USE_POSIX_MUTEX */ # endif { lock_info.l_type = F_RDLCK; @@ -4507,7 +4514,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) env->me_wmutex = sem_open(env->me_txns->mti_wmname, O_CREAT|O_EXCL, mode, 1); if (env->me_wmutex == SEM_FAILED) goto fail_errno; -#else /* MDB_USE_POSIX_SEM */ +#else /* MDB_USE_POSIX_MUTEX: */ pthread_mutexattr_t mattr; if ((rc = pthread_mutexattr_init(&mattr)) From 600e2b6ce0b96cd442e0d5a805aa69cf0179f728 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Fri, 3 Jul 2015 23:07:30 +0200 Subject: [PATCH 11/34] Support robust mutexes/locks. Add mdb_mutex_t etc. --- libraries/liblmdb/lmdb.h | 15 ++- libraries/liblmdb/mdb.c | 224 ++++++++++++++++++++++++++++----------- 2 files changed, 174 insertions(+), 65 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index ee302a2365..1e934f4767 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -49,9 +49,13 @@ * stale locks can block further operation. * * Fix: Check for stale readers periodically, using the - * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. Or just - * make all programs using the database close it; the lockfile - * is always reset on first open of the environment. + * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. + * Stale writers will be cleared automatically on some systems: + * - Windows - automatic + * - Linux, systems using POSIX mutexes with Robust option - automatic + * - not on BSD, systems using POSIX semaphores. + * Otherwise just make all programs using the database close it; + * the lockfile is always reset on first open of the environment. * * - On BSD systems or others configured with MDB_USE_POSIX_SEM, * startup can fail due to semaphores owned by another userid. @@ -106,6 +110,9 @@ * for stale readers is performed or the lockfile is reset, * since the process may not remove it from the lockfile. * + * This does not apply to write transactions if the system clears + * stale writers, see above. + * * - If you do that anyway, do a periodic check for stale readers. Or * close the environment once in a while, so the lockfile can get reset. * @@ -391,7 +398,7 @@ typedef enum MDB_cursor_op { #define MDB_PAGE_NOTFOUND (-30797) /** Located page was wrong type */ #define MDB_CORRUPTED (-30796) - /** Update of meta page failed, probably I/O error */ + /** Update of meta page failed or environment had fatal error */ #define MDB_PANIC (-30795) /** Environment version mismatch */ #define MDB_VERSION_MISMATCH (-30794) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 1cd915042d..1cb99e7a91 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -224,6 +224,18 @@ extern int cacheflush(char *addr, int nbytes, int cache); # define mdb_func_ "" #endif +/* Internal error codes, not exposed outside liblmdb */ +#define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10) +#ifdef _WIN32 +#define MDB_OWNERDEAD ((int) WAIT_ABANDONED) +#elif defined(MDB_USE_POSIX_MUTEX) && defined(EOWNERDEAD) +#define MDB_OWNERDEAD EOWNERDEAD /**< #LOCK_MUTEX0() result if dead owner */ +#endif + +#ifdef MDB_OWNERDEAD +#define MDB_ROBUST_SUPPORTED 1 +#endif + #ifdef _WIN32 #define MDB_USE_HASH 1 #define MDB_PIDLOCK 0 @@ -231,6 +243,7 @@ extern int cacheflush(char *addr, int nbytes, int cache); #define pthread_t HANDLE #define pthread_mutex_t HANDLE #define pthread_cond_t HANDLE +typedef HANDLE mdb_mutex_t, mdb_mutexref_t; #define pthread_key_t DWORD #define pthread_self() GetCurrentThreadId() #define pthread_key_create(x,y) \ @@ -244,10 +257,9 @@ extern int cacheflush(char *addr, int nbytes, int cache); #define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) #define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) #define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) -#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex) -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex) -#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex) -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex) +#define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE) +#define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex) +#define mdb_mutex_consistent(mutex) 0 #define getpid() GetCurrentProcessId() #define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd)) #define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len)) @@ -272,10 +284,9 @@ extern int cacheflush(char *addr, int nbytes, int cache); #ifdef MDB_USE_POSIX_SEM -#define LOCK_MUTEX_R(env) mdb_sem_wait((env)->me_rmutex) -#define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex) -#define LOCK_MUTEX_W(env) mdb_sem_wait((env)->me_wmutex) -#define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex) +typedef sem_t *mdb_mutex_t, *mdb_mutexref_t; +#define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex) +#define UNLOCK_MUTEX(mutex) sem_post(mutex) static int mdb_sem_wait(sem_t *sem) @@ -286,21 +297,25 @@ mdb_sem_wait(sem_t *sem) } #else /* MDB_USE_POSIX_MUTEX: */ - /** Lock the reader mutex. + /** Shared mutex/semaphore as it is stored (mdb_mutex_t), and as + * local variables keep it (mdb_mutexref_t). + * + * When #mdb_mutexref_t is a pointer declaration and #mdb_mutex_t is + * not, then it is array[size 1] so it can be assigned to a pointer. + * @{ */ -#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex) - /** Unlock the reader mutex. +typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; + /* @} */ + /** Lock the reader or writer mutex. + * Returns 0 or a code to give #mdb_mutex_failed(), as in #LOCK_MUTEX(). */ -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex) - - /** Lock the writer mutex. - * Only a single write transaction is allowed at a time. Other writers - * will block waiting for this mutex. +#define LOCK_MUTEX0(mutex) pthread_mutex_lock(mutex) + /** Unlock the reader or writer mutex. */ -#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex) - /** Unlock the writer mutex. +#define UNLOCK_MUTEX(mutex) pthread_mutex_unlock(mutex) + /** Mark mutex-protected data as repaired, after death of previous owner. */ -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex) +#define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex) #endif /* MDB_USE_POSIX_SEM */ /** Get the error code for the last failed system function. @@ -334,6 +349,19 @@ mdb_sem_wait(sem_t *sem) /** @} */ +#ifdef MDB_ROBUST_SUPPORTED + /** Lock mutex, handle any error, set rc = result. + * Return 0 on success, nonzero (not rc) on error. + */ +#define LOCK_MUTEX(rc, env, mutex) \ + (((rc) = LOCK_MUTEX0(mutex)) && \ + ((rc) = mdb_mutex_failed(env, mutex, rc))) +static int mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc); +#else +#define LOCK_MUTEX(rc, env, mutex) ((rc) = LOCK_MUTEX0(mutex)) +#define mdb_mutex_failed(env, mutex, rc) (rc) +#endif + #ifndef _WIN32 /** A flag for opening a file and requesting synchronous data writes. * This is only used when writing a meta page. It's not strictly needed; @@ -650,9 +678,9 @@ typedef struct MDB_txbody { char mtb_rmname[MNAME_LEN]; #else /** Mutex protecting access to this table. - * This is the reader lock that #LOCK_MUTEX_R acquires. + * This is the reader table lock used with LOCK_MUTEX(). */ - pthread_mutex_t mtb_mutex; + mdb_mutex_t mtb_rmutex; #endif /** The ID of the last transaction committed to the database. * This is recorded here only for convenience; the value can always @@ -672,7 +700,7 @@ typedef struct MDB_txninfo { MDB_txbody mtb; #define mti_magic mt1.mtb.mtb_magic #define mti_format mt1.mtb.mtb_format -#define mti_mutex mt1.mtb.mtb_mutex +#define mti_rmutex mt1.mtb.mtb_rmutex #define mti_rmname mt1.mtb.mtb_rmname #define mti_txnid mt1.mtb.mtb_txnid #define mti_numreaders mt1.mtb.mtb_numreaders @@ -683,7 +711,7 @@ typedef struct MDB_txninfo { char mt2_wmname[MNAME_LEN]; #define mti_wmname mt2.mt2_wmname #else - pthread_mutex_t mt2_wmutex; + mdb_mutex_t mt2_wmutex; #define mti_wmutex mt2.mt2_wmutex #endif char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)]; @@ -1170,11 +1198,13 @@ struct MDB_env { int me_live_reader; /**< have liveness lock in reader table */ #ifdef _WIN32 int me_pidquery; /**< Used in OpenProcess */ - HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */ - HANDLE me_wmutex; -#elif defined(MDB_USE_POSIX_SEM) - sem_t *me_rmutex; /* Shared mutexes are not supported */ - sem_t *me_wmutex; +#endif +#ifdef MDB_USE_POSIX_MUTEX /* Posix mutexes reside in shared mem */ +# define me_rmutex me_txns->mti_rmutex /**< Shared reader lock */ +# define me_wmutex me_txns->mti_wmutex /**< Shared writer lock */ +#else + mdb_mutex_t me_rmutex; + mdb_mutex_t me_wmutex; #endif void *me_userctx; /**< User-settable context */ MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ @@ -1264,6 +1294,7 @@ static void mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int force); static int mdb_drop0(MDB_cursor *mc, int subs); static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); +static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead); /** @cond */ static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long; @@ -1298,7 +1329,7 @@ static char *const mdb_errstr[] = { "MDB_NOTFOUND: No matching key/data pair found", "MDB_PAGE_NOTFOUND: Requested page not found", "MDB_CORRUPTED: Located page was wrong type", - "MDB_PANIC: Update of meta page failed", + "MDB_PANIC: Update of meta page failed or environment had fatal error", "MDB_VERSION_MISMATCH: Database environment version mismatch", "MDB_INVALID: File is not an LMDB file", "MDB_MAP_FULL: Environment mapsize limit reached", @@ -2540,6 +2571,7 @@ mdb_txn_renew0(MDB_txn *txn) } else { MDB_PID_T pid = env->me_pid; MDB_THR_T tid = pthread_self(); + mdb_mutexref_t rmutex = env->me_rmutex; if (!env->me_live_reader) { rc = mdb_reader_pid(env, Pidset, pid); @@ -2548,13 +2580,14 @@ mdb_txn_renew0(MDB_txn *txn) env->me_live_reader = 1; } - LOCK_MUTEX_R(env); + if (LOCK_MUTEX(rc, env, rmutex)) + return rc; nr = ti->mti_numreaders; for (i=0; imti_readers[i].mr_pid == 0) break; if (i == env->me_maxreaders) { - UNLOCK_MUTEX_R(env); + UNLOCK_MUTEX(rmutex); return MDB_READERS_FULL; } r = &ti->mti_readers[i]; @@ -2571,7 +2604,7 @@ mdb_txn_renew0(MDB_txn *txn) ti->mti_numreaders = ++nr; env->me_close_readers = nr; r->mr_pid = pid; - UNLOCK_MUTEX_R(env); + UNLOCK_MUTEX(rmutex); new_notls = (env->me_flags & MDB_NOTLS); if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) { @@ -2590,8 +2623,8 @@ mdb_txn_renew0(MDB_txn *txn) } else { /* Not yet touching txn == env->me_txn0, it may be active */ if (ti) { - LOCK_MUTEX_W(env); - + if (LOCK_MUTEX(rc, env, env->me_wmutex)) + return rc; txn->mt_txnid = ti->mti_txnid; meta = env->me_metas[txn->mt_txnid & 1]; } else { @@ -2868,7 +2901,7 @@ mdb_txn_reset0(MDB_txn *txn, const char *act) env->me_txn = NULL; /* The writer mutex was locked in mdb_txn_begin. */ if (env->me_txns) - UNLOCK_MUTEX_W(env); + UNLOCK_MUTEX(env->me_wmutex); } else { txn->mt_parent->mt_child = NULL; env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; @@ -3468,7 +3501,7 @@ done: mdb_dbis_update(txn, 1); if (env->me_txns) - UNLOCK_MUTEX_W(env); + UNLOCK_MUTEX(env->me_wmutex); if (txn != env->me_txn0) free(txn); @@ -4519,8 +4552,11 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) if ((rc = pthread_mutexattr_init(&mattr)) || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) - || (rc = pthread_mutex_init(&env->me_txns->mti_mutex, &mattr)) - || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) +#ifdef MDB_ROBUST_SUPPORTED + || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST)) +#endif + || (rc = pthread_mutex_init(env->me_txns->mti_rmutex, &mattr)) + || (rc = pthread_mutex_init(env->me_txns->mti_wmutex, &mattr))) goto fail; pthread_mutexattr_destroy(&mattr); #endif /* _WIN32 || MDB_USE_POSIX_SEM */ @@ -4577,8 +4613,8 @@ fail: * environment and re-opening it with the new flags. */ #define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT) -#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP| \ - MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD) +#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \ + MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD) #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS) # error "Persistent DB flags & env flags overlap, but both go in mm_flags" @@ -6165,7 +6201,6 @@ int mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned int flags) { - enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */ MDB_env *env; MDB_node *leaf = NULL; MDB_page *fp, *mp, *sub_root = NULL; @@ -8928,6 +8963,7 @@ static int ESECT mdb_env_copyfd0(MDB_env *env, HANDLE fd) { MDB_txn *txn = NULL; + mdb_mutexref_t wmutex = NULL; int rc; size_t wsize; char *ptr; @@ -8952,11 +8988,13 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) mdb_txn_reset0(txn, "reset-stage1"); /* Temporarily block writers until we snapshot the meta pages */ - LOCK_MUTEX_W(env); + wmutex = env->me_wmutex; + if (LOCK_MUTEX(rc, env, wmutex)) + goto leave; rc = mdb_txn_renew0(txn); if (rc) { - UNLOCK_MUTEX_W(env); + UNLOCK_MUTEX(wmutex); goto leave; } } @@ -8980,8 +9018,8 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) break; } } - if (env->me_txns) - UNLOCK_MUTEX_W(env); + if (wmutex) + UNLOCK_MUTEX(wmutex); if (rc) goto leave; @@ -9654,17 +9692,22 @@ mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) int ESECT mdb_reader_check(MDB_env *env, int *dead) { - unsigned int i, j, rdrs; - MDB_reader *mr; - MDB_PID_T *pids, pid; - int count = 0; - if (!env) return EINVAL; if (dead) *dead = 0; - if (!env->me_txns) - return MDB_SUCCESS; + return env->me_txns ? mdb_reader_check0(env, 0, dead) : MDB_SUCCESS; +} + +/** As #mdb_reader_check(). rlocked = . */ +static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead) +{ + mdb_mutexref_t rmutex = rlocked ? NULL : env->me_rmutex; + unsigned int i, j, rdrs; + MDB_reader *mr; + MDB_PID_T *pids, pid; + int rc = MDB_SUCCESS, count = 0; + rdrs = env->me_txns->mti_numreaders; pids = malloc((rdrs+1) * sizeof(MDB_PID_T)); if (!pids) @@ -9672,22 +9715,32 @@ mdb_reader_check(MDB_env *env, int *dead) pids[0] = 0; mr = env->me_txns->mti_readers; for (i=0; ime_pid) { - pid = mr[i].mr_pid; + pid = mr[i].mr_pid; + if (pid && pid != env->me_pid) { if (mdb_pid_insert(pids, pid) == 0) { if (!mdb_reader_pid(env, Pidcheck, pid)) { - LOCK_MUTEX_R(env); - /* Recheck, a new process may have reused pid */ - if (!mdb_reader_pid(env, Pidcheck, pid)) { - for (j=i; jme_rmutex); + if (!rlocked) { + /* Keep mti_txnid updated, otherwise next writer can + * overwrite data which latest meta page refers to. + */ + toggle = mdb_env_pick_meta(env); + env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; + /* env is hosed if the dead thread was ours */ + if (env->me_txn) { + env->me_flags |= MDB_FATAL_ERROR; + env->me_txn = NULL; + rc = MDB_PANIC; + } + } + DPRINTF(("%cmutex owner died, %s", (rlocked ? 'r' : 'w'), + (rc ? "this process' env is hosed" : "recovering"))); + rc2 = mdb_reader_check0(env, rlocked, NULL); + if (rc2 == 0) + rc2 = mdb_mutex_consistent(mutex); + if (rc || (rc = rc2)) { + DPRINTF(("LOCK_MUTEX recovery failed, %s", mdb_strerror(rc))); + UNLOCK_MUTEX(mutex); + } + } else { +#ifdef _WIN32 + rc = ErrCode(); +#endif + DPRINTF(("LOCK_MUTEX failed, %s", mdb_strerror(rc))); + } + + return rc; +} +#endif /* MDB_ROBUST_SUPPORTED */ /** @} */ From a36f7a7fc2128b3a6c9ac5b029c111904d28f49e Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:42:41 +0200 Subject: [PATCH 12/34] mdb_env_get_flags(): Hide internal flags --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 1cb99e7a91..410981e989 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -9157,7 +9157,7 @@ mdb_env_get_flags(MDB_env *env, unsigned int *arg) if (!env || !arg) return EINVAL; - *arg = env->me_flags; + *arg = env->me_flags & (CHANGEABLE|CHANGELESS); return MDB_SUCCESS; } From 70788bfe45b32c32fb1bc23fdfe84b325d7de836 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:48:01 +0200 Subject: [PATCH 13/34] Use mdb_cmp_long() for FREE_DBI --- libraries/liblmdb/mdb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 410981e989..7ba1a529f7 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4670,6 +4670,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode rc = ENOMEM; goto leave; } + env->me_dbxs[FREE_DBI].md_cmp = mdb_cmp_long; /* aligned MDB_INTEGERKEY */ /* For RDONLY, get lockfile after we know datafile exists */ if (!(flags & (MDB_RDONLY|MDB_NOLOCK))) { @@ -9291,10 +9292,6 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db unsigned int unused = 0, seq; size_t len; - if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) { - mdb_default_cmp(txn, FREE_DBI); - } - if ((flags & VALID_FLAGS) != flags) return EINVAL; if (txn->mt_flags & MDB_TXN_ERROR) From cc2a50aca226c18b2aaee4e92579f6e675761fc8 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:48:02 +0200 Subject: [PATCH 14/34] mdb_drop0(): Omit scanning DUPSORT sub-DB leaves --- libraries/liblmdb/mdb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 7ba1a529f7..d08e3a7c10 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -9445,8 +9445,10 @@ mdb_drop0(MDB_cursor *mc, int subs) MDB_cursor mx; unsigned int i; - /* LEAF2 pages have no nodes, cannot have sub-DBs */ - if (IS_LEAF2(mc->mc_pg[mc->mc_top])) + /* DUPSORT sub-DBs have no ovpages/DBs. Omit scanning leaves. + * This also avoids any P_LEAF2 pages, which have no nodes. + */ + if (mc->mc_flags & C_SUB) mdb_cursor_pop(mc); mdb_cursor_copy(mc, &mx); From ee06adb31183c726b7eba3ce1c6468b605418c36 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:48:03 +0200 Subject: [PATCH 15/34] mdb_txn_begin() cleanup --- libraries/liblmdb/mdb.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index d08e3a7c10..73490db591 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -2717,7 +2717,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) if (env->me_flags & MDB_RDONLY & ~flags) /* write txn in RDONLY env */ return EACCES; - size = tsize = sizeof(MDB_txn); if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ flags |= parent->mt_flags; @@ -2727,40 +2726,31 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN; } /* Child txns save MDB_pgstate and use own copy of cursors */ - size = tsize = sizeof(MDB_ntxn); - size += env->me_maxdbs * sizeof(MDB_cursor *); - } else if (!(flags & MDB_RDONLY)) { + size = env->me_maxdbs * (sizeof(MDB_db)+sizeof(MDB_cursor *)+1); + size += tsize = sizeof(MDB_ntxn); + } else if (flags & MDB_RDONLY) { + size = env->me_maxdbs * (sizeof(MDB_db)+1); + size += tsize = sizeof(MDB_txn); + } else { /* Reuse preallocated write txn. However, do not touch it until * mdb_txn_renew0() succeeds, since it currently may be active. */ txn = env->me_txn0; goto renew; } - size += env->me_maxdbs * (sizeof(MDB_db)+1); - if ((txn = calloc(1, size)) == NULL) { DPRINTF(("calloc: %s", strerror(errno))); return ENOMEM; } txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); - if (flags & MDB_RDONLY) { - txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs); - txn->mt_dbiseqs = env->me_dbiseqs; - } else { - txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); - if (parent) { - txn->mt_dbiseqs = parent->mt_dbiseqs; - txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs); - } else { - txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); - txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); - } - } + txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs; txn->mt_flags = flags; txn->mt_env = env; if (parent) { unsigned int i; + txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbiseqs = parent->mt_dbiseqs; txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE); if (!txn->mt_u.dirty_list || !(txn->mt_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX))) @@ -2797,7 +2787,8 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) rc = mdb_cursor_shadow(parent, txn); if (rc) mdb_txn_reset0(txn, "beginchild-fail"); - } else { + } else { /* MDB_RDONLY */ + txn->mt_dbiseqs = env->me_dbiseqs; renew: rc = mdb_txn_renew0(txn); } @@ -2805,7 +2796,7 @@ renew: if (txn != env->me_txn0) free(txn); } else { - txn->mt_flags |= flags; /* for txn==me_txn0, no effect otherwise */ + txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */ *ret = txn; DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w', From 9d6c973f765809cd616972c15c104311075a0998 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:48:04 +0200 Subject: [PATCH 16/34] Simpler mdb_txn_commit(). mt_env is always set. Commit(mt_child) resets mt_child, so parent need not. --- libraries/liblmdb/mdb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 73490db591..80a6eb977f 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -3287,12 +3287,11 @@ mdb_txn_commit(MDB_txn *txn) unsigned int i; MDB_env *env; - if (txn == NULL || txn->mt_env == NULL) + if (txn == NULL) return EINVAL; if (txn->mt_child) { rc = mdb_txn_commit(txn->mt_child); - txn->mt_child = NULL; if (rc) goto fail; } From e5f41ce56bc1cb0c62378e91469e5674915cb192 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:48:05 +0200 Subject: [PATCH 17/34] Simpler mdb_node_shrink() --- libraries/liblmdb/mdb.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 80a6eb977f..774ba9021b 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -7104,45 +7104,38 @@ mdb_node_shrink(MDB_page *mp, indx_t indx) MDB_node *node; MDB_page *sp, *xp; char *base; - int nsize, delta; - indx_t i, numkeys, ptr; + indx_t delta, nsize, len, ptr; + int i; node = NODEPTR(mp, indx); sp = (MDB_page *)NODEDATA(node); delta = SIZELEFT(sp); - xp = (MDB_page *)((char *)sp + delta); + nsize = NODEDSZ(node) - delta; - /* shift subpage upward */ + /* Prepare to shift upward, set len = length(subpage part to shift) */ if (IS_LEAF2(sp)) { - nsize = NUMKEYS(sp) * sp->mp_pad; + len = nsize; if (nsize & 1) return; /* do not make the node uneven-sized */ - memmove(METADATA(xp), METADATA(sp), nsize); } else { - int i; - numkeys = NUMKEYS(sp); - for (i=numkeys-1; i>=0; i--) + xp = (MDB_page *)((char *)sp + delta); /* destination subpage */ + for (i = NUMKEYS(sp); --i >= 0; ) xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta; + len = PAGEHDRSZ; } - xp->mp_upper = sp->mp_lower; - xp->mp_lower = sp->mp_lower; - xp->mp_flags = sp->mp_flags; - xp->mp_pad = sp->mp_pad; - COPY_PGNO(xp->mp_pgno, mp->mp_pgno); - - nsize = NODEDSZ(node) - delta; + sp->mp_upper = sp->mp_lower; + COPY_PGNO(sp->mp_pgno, mp->mp_pgno); SETDSZ(node, nsize); - /* shift lower nodes upward */ + /* Shift upward */ + base = (char *)mp + mp->mp_upper + PAGEBASE; + memmove(base + delta, base, (char *)sp + len - base); + ptr = mp->mp_ptrs[indx]; - numkeys = NUMKEYS(mp); - for (i = 0; i < numkeys; i++) { + for (i = NUMKEYS(mp); --i >= 0; ) { if (mp->mp_ptrs[i] <= ptr) mp->mp_ptrs[i] += delta; } - - base = (char *)mp + mp->mp_upper + PAGEBASE; - memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); mp->mp_upper += delta; } From 06df0a4655ec12ac17f4a0264427a62820859d7a Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:48:06 +0200 Subject: [PATCH 18/34] Simpler mdb_drop(). MDB_DBI_CHANGED(,MAIN_DBI) is never true. --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 774ba9021b..46555013c6 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -9511,7 +9511,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) return EACCES; - if (dbi > MAIN_DBI && TXN_DBI_CHANGED(txn, dbi)) + if (TXN_DBI_CHANGED(txn, dbi)) return MDB_BAD_DBI; rc = mdb_cursor_open(txn, dbi, &mc); From b6f017968332b981363e27d8308656ff53e2645c Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:48:07 +0200 Subject: [PATCH 19/34] Simpler flag/DBI checks and MDB_DEBUG --- libraries/liblmdb/mdb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 46555013c6..613df457ed 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -5082,15 +5082,12 @@ static void mdb_cursor_pop(MDB_cursor *mc) { if (mc->mc_snum) { -#if MDB_DEBUG - MDB_page *top = mc->mc_pg[mc->mc_top]; -#endif + DPRINTF(("popping page %"Z"u off db %d cursor %p", + mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *) mc)); + mc->mc_snum--; if (mc->mc_snum) mc->mc_top--; - - DPRINTF(("popped page %"Z"u off db %d cursor %p", top->mp_pgno, - DDBI(mc), (void *) mc)); } } @@ -8556,7 +8553,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) return EINVAL; - if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) + if (flags & ~(MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) return EINVAL; mdb_cursor_init(&mc, txn, dbi, &mx); @@ -9126,7 +9123,7 @@ mdb_env_copy(MDB_env *env, const char *path) int ESECT mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) { - if ((flag & CHANGEABLE) != flag) + if (flag & ~CHANGEABLE) return EINVAL; if (onoff) env->me_flags |= flag; @@ -9275,7 +9272,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db unsigned int unused = 0, seq; size_t len; - if ((flags & VALID_FLAGS) != flags) + if (flags & ~VALID_FLAGS) return EINVAL; if (txn->mt_flags & MDB_TXN_ERROR) return MDB_BAD_TXN; From 110fba2ef1fcc5d08a78ed5ee72134828ce70b1a Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:49:12 +0200 Subject: [PATCH 20/34] Add DB_USRVALID, to avoid 'dbi == FREE_DBI' tests --- libraries/liblmdb/mdb.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 613df457ed..2a3fb1a0df 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1037,6 +1037,7 @@ struct MDB_txn { #define DB_STALE 0x02 /**< Named-DB record is older than txnID */ #define DB_NEW 0x04 /**< Named-DB handle opened in this txn */ #define DB_VALID 0x08 /**< DB handle is valid, see also #MDB_VALID */ +#define DB_USRVALID 0x10 /**< As #DB_VALID, but not set for #FREE_DBI */ /** @} */ /** In write txns, array of cursors for each DB */ MDB_cursor **mt_cursors; @@ -1227,8 +1228,8 @@ typedef struct MDB_ntxn { #define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4)) /** Check \b txn and \b dbi arguments to a function */ -#define TXN_DBI_EXIST(txn, dbi) \ - ((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID)) +#define TXN_DBI_EXIST(txn, dbi, validity) \ + ((txn) && (dbi)<(txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & (validity))) /** Check for misused \b dbi handles */ #define TXN_DBI_CHANGED(txn, dbi) \ @@ -2662,9 +2663,10 @@ mdb_txn_renew0(MDB_txn *txn) for (i=2; imt_numdbs; i++) { x = env->me_dbflags[i]; txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; - txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0; + txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_USRVALID|DB_STALE : 0; } - txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID; + txn->mt_dbflags[MAIN_DBI] = DB_VALID|DB_USRVALID; + txn->mt_dbflags[FREE_DBI] = DB_VALID; if (env->me_maxpg < txn->mt_next_pgno) { mdb_txn_reset0(txn, "renew0-mapfail"); @@ -5466,7 +5468,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); - if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!key || !data || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; if (txn->mt_flags & MDB_TXN_ERROR) @@ -7207,7 +7209,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) } DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, mx->mx_db.md_root)); - mx->mx_dbflag = DB_VALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ + mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ #if UINT_MAX < SIZE_MAX if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) mx->mx_dbx.md_cmp = mdb_cmp_clong; @@ -7280,7 +7282,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) MDB_cursor *mc; size_t size = sizeof(MDB_cursor); - if (!ret || !TXN_DBI_EXIST(txn, dbi)) + if (!ret || !TXN_DBI_EXIST(txn, dbi, DB_VALID)) return EINVAL; if (txn->mt_flags & MDB_TXN_ERROR) @@ -7312,7 +7314,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) { - if (!mc || !TXN_DBI_EXIST(txn, mc->mc_dbi)) + if (!mc || !TXN_DBI_EXIST(txn, mc->mc_dbi, DB_VALID)) return EINVAL; if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors) @@ -8084,7 +8086,7 @@ int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data) { - if (!key || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) @@ -8550,7 +8552,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_cursor mc; MDB_xcursor mx; - if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!key || !data || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; if (flags & ~(MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) @@ -9320,7 +9322,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND; /* Find the DB info */ - dbflag = DB_NEW|DB_VALID; + dbflag = DB_NEW|DB_VALID|DB_USRVALID; exact = 0; key.mv_size = len; key.mv_data = (void *)name; @@ -9368,7 +9370,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) { - if (!arg || !TXN_DBI_EXIST(txn, dbi)) + if (!arg || !TXN_DBI_EXIST(txn, dbi, DB_VALID)) return EINVAL; if (txn->mt_flags & MDB_TXN_ERROR) @@ -9402,7 +9404,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi) int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags) { /* We could return the flags for the FREE_DBI too but what's the point? */ - if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS; return MDB_SUCCESS; @@ -9502,7 +9504,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) MDB_cursor *mc, *m2; int rc; - if ((unsigned)del > 1 || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if ((unsigned)del > 1 || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) @@ -9550,7 +9552,7 @@ leave: int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) { - if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; txn->mt_dbxs[dbi].md_cmp = cmp; @@ -9559,7 +9561,7 @@ int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) { - if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; txn->mt_dbxs[dbi].md_dcmp = cmp; @@ -9568,7 +9570,7 @@ int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) { - if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; txn->mt_dbxs[dbi].md_rel = rel; @@ -9577,7 +9579,7 @@ int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx) { - if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi)) + if (!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; txn->mt_dbxs[dbi].md_relctx = ctx; From 631803813837a5588b58d300479fa7567c51913c Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 4 Jul 2015 13:50:21 +0200 Subject: [PATCH 21/34] More ESECT declarations --- libraries/liblmdb/mdb.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 2a3fb1a0df..5f0cf63ec0 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1315,7 +1315,7 @@ static int mdb_sec_inited; #endif /** Return the library version info. */ -char * +char * ESECT mdb_version(int *major, int *minor, int *patch) { if (major) *major = MDB_VERSION_MAJOR; @@ -1407,7 +1407,7 @@ mdb_strerror(int err) # define mdb_assert0(env, expr, expr_txt) ((expr) ? (void)0 : \ mdb_assert_fail(env, expr_txt, mdb_func_, __FILE__, __LINE__)) -static void +static void ESECT mdb_assert_fail(MDB_env *env, const char *expr_txt, const char *func, const char *file, int line) { @@ -4350,7 +4350,7 @@ mdb_hash_val(MDB_val *val, mdb_hash_t hval) */ static const char mdb_a85[]= "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; -static void +static void ESECT mdb_pack85(unsigned long l, char *out) { int i; @@ -4361,7 +4361,7 @@ mdb_pack85(unsigned long l, char *out) } } -static void +static void ESECT mdb_hash_enc(MDB_val *val, char *encbuf) { mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT); @@ -9368,7 +9368,8 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db return rc; } -int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) +int ESECT +mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) { if (!arg || !TXN_DBI_EXIST(txn, dbi, DB_VALID)) return EINVAL; @@ -9681,7 +9682,8 @@ mdb_reader_check(MDB_env *env, int *dead) } /** As #mdb_reader_check(). rlocked = . */ -static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead) +static int ESECT +mdb_reader_check0(MDB_env *env, int rlocked, int *dead) { mdb_mutexref_t rmutex = rlocked ? NULL : env->me_rmutex; unsigned int i, j, rdrs; @@ -9740,7 +9742,8 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead) * @param[in] rc LOCK_MUTEX0() error (nonzero) * @return 0 on success with the mutex locked, or an error code on failure. */ -static int mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc) +static int ESECT +mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc) { int toggle, rlocked, rc2; From 490243424cbe6ff5d1594d2d3c95df23c1db86a8 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 11 Jul 2015 21:09:36 +0200 Subject: [PATCH 22/34] Simpler mdb_node_add() --- libraries/liblmdb/mdb.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 5f0cf63ec0..2d8d0567ad 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -6924,6 +6924,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, MDB_node *node; MDB_page *mp = mc->mc_pg[mc->mc_top]; MDB_page *ofp = NULL; /* overflow page */ + void *ndata; DKBUF; mdb_cassert(mc, mp->mp_upper >= mp->mp_lower); @@ -6954,7 +6955,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, if (key != NULL) node_size += key->mv_size; if (IS_LEAF(mp)) { - mdb_cassert(mc, data); + mdb_cassert(mc, key && data); if (F_ISSET(flags, F_BIGDATA)) { /* Data already on overflow page. */ node_size += sizeof(pgno_t); @@ -7005,23 +7006,21 @@ update: memcpy(NODEKEY(node), key->mv_data, key->mv_size); if (IS_LEAF(mp)) { - mdb_cassert(mc, key); + ndata = NODEDATA(node); if (ofp == NULL) { if (F_ISSET(flags, F_BIGDATA)) - memcpy(node->mn_data + key->mv_size, data->mv_data, - sizeof(pgno_t)); + memcpy(ndata, data->mv_data, sizeof(pgno_t)); else if (F_ISSET(flags, MDB_RESERVE)) - data->mv_data = node->mn_data + key->mv_size; + data->mv_data = ndata; else - memcpy(node->mn_data + key->mv_size, data->mv_data, - data->mv_size); + memcpy(ndata, data->mv_data, data->mv_size); } else { - memcpy(node->mn_data + key->mv_size, &ofp->mp_pgno, - sizeof(pgno_t)); + memcpy(ndata, &ofp->mp_pgno, sizeof(pgno_t)); + ndata = METADATA(ofp); if (F_ISSET(flags, MDB_RESERVE)) - data->mv_data = METADATA(ofp); + data->mv_data = ndata; else - memcpy(METADATA(ofp), data->mv_data, data->mv_size); + memcpy(ndata, data->mv_data, data->mv_size); } } From 0cfc0a9736d88a9acdaf8440fdcc58510bab1168 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 11 Jul 2015 21:10:33 +0200 Subject: [PATCH 23/34] Factor me_metas[toggle] out to mdb_env_pick_meta() --- libraries/liblmdb/mdb.c | 53 ++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 2d8d0567ad..820417d3d8 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1255,7 +1255,7 @@ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, unsigned int nflags); static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); -static int mdb_env_pick_meta(const MDB_env *env); +static MDB_meta *mdb_env_pick_meta(const MDB_env *env); static int mdb_env_write_meta(MDB_txn *txn); #ifdef MDB_USE_POSIX_MUTEX /* Drop unused excl arg */ # define mdb_env_close0(env, excl) mdb_env_close1(env) @@ -2560,7 +2560,7 @@ mdb_txn_renew0(MDB_txn *txn) if ((flags &= MDB_TXN_RDONLY) != 0) { if (!ti) { - meta = env->me_metas[ mdb_env_pick_meta(env) ]; + meta = mdb_env_pick_meta(env); txn->mt_txnid = meta->mm_txnid; txn->mt_u.reader = NULL; } else { @@ -2629,7 +2629,7 @@ mdb_txn_renew0(MDB_txn *txn) txn->mt_txnid = ti->mti_txnid; meta = env->me_metas[txn->mt_txnid & 1]; } else { - meta = env->me_metas[ mdb_env_pick_meta(env) ]; + meta = mdb_env_pick_meta(env); txn->mt_txnid = meta->mm_txnid; } txn->mt_txnid++; @@ -3771,12 +3771,13 @@ done: /** Check both meta pages to see which one is newer. * @param[in] env the environment handle - * @return meta toggle (0 or 1). + * @return newest #MDB_meta. */ -static int +static MDB_meta * mdb_env_pick_meta(const MDB_env *env) { - return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); + MDB_meta *const *metas = env->me_metas; + return metas[ metas[0]->mm_txnid < metas[1]->mm_txnid ]; } int ESECT @@ -3900,7 +3901,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size) void *old; if (env->me_txn) return EINVAL; - meta = env->me_metas[mdb_env_pick_meta(env)]; + meta = mdb_env_pick_meta(env); if (!size) size = meta->mm_mapsize; { @@ -4107,12 +4108,12 @@ mdb_env_open2(MDB_env *env) #if MDB_DEBUG { - int toggle = mdb_env_pick_meta(env); - MDB_db *db = &env->me_metas[toggle]->mm_dbs[MAIN_DBI]; + MDB_meta *meta = mdb_env_pick_meta(env); + MDB_db *db = &meta->mm_dbs[MAIN_DBI]; DPRINTF(("opened database version %u, pagesize %u", - env->me_metas[0]->mm_version, env->me_psize)); - DPRINTF(("using meta page %d", toggle)); + meta->mm_version, env->me_psize)); + DPRINTF(("using meta page %d", (int) (meta->mm_txnid & 1))); DPRINTF(("depth: %u", db->md_depth)); DPRINTF(("entries: %"Z"u", db->md_entries)); DPRINTF(("branch pages: %"Z"u", db->md_branch_pages)); @@ -4199,9 +4200,10 @@ PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; static int ESECT mdb_env_share_locks(MDB_env *env, int *excl) { - int rc = 0, toggle = mdb_env_pick_meta(env); + int rc = 0; + MDB_meta *meta = mdb_env_pick_meta(env); - env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; + env->me_txns->mti_txnid = meta->mm_txnid; #ifdef _WIN32 { @@ -9211,32 +9213,32 @@ mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) int ESECT mdb_env_stat(MDB_env *env, MDB_stat *arg) { - int toggle; + MDB_meta *meta; if (env == NULL || arg == NULL) return EINVAL; - toggle = mdb_env_pick_meta(env); + meta = mdb_env_pick_meta(env); - return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); + return mdb_stat0(env, &meta->mm_dbs[MAIN_DBI], arg); } int ESECT mdb_env_info(MDB_env *env, MDB_envinfo *arg) { - int toggle; + MDB_meta *meta; if (env == NULL || arg == NULL) return EINVAL; - toggle = mdb_env_pick_meta(env); - arg->me_mapaddr = env->me_metas[toggle]->mm_address; + meta = mdb_env_pick_meta(env); + arg->me_mapaddr = meta->mm_address; + arg->me_last_pgno = meta->mm_last_pg; + arg->me_last_txnid = meta->mm_txnid; + arg->me_mapsize = env->me_mapsize; arg->me_maxreaders = env->me_maxreaders; arg->me_numreaders = env->me_txns ? env->me_txns->mti_numreaders : 0; - - arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; - arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; return MDB_SUCCESS; } @@ -9744,7 +9746,8 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead) static int ESECT mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc) { - int toggle, rlocked, rc2; + int rlocked, rc2; + MDB_meta *meta; if (rc == MDB_OWNERDEAD) { /* We own the mutex. Clean up after dead previous owner. */ @@ -9754,8 +9757,8 @@ mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc) /* Keep mti_txnid updated, otherwise next writer can * overwrite data which latest meta page refers to. */ - toggle = mdb_env_pick_meta(env); - env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; + meta = mdb_env_pick_meta(env); + env->me_txns->mti_txnid = meta->mm_txnid; /* env is hosed if the dead thread was ours */ if (env->me_txn) { env->me_flags |= MDB_FATAL_ERROR; From 9266843fa6ee00a38b410ae038446ed66b750e77 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 19 Jul 2015 21:30:12 +0200 Subject: [PATCH 24/34] Move code into mdb_txn_end(). Was mdb_txn_reset0. Side effects: * Clean txn up a bit even before freeing it. * Tweak DEBUG output at txn end. Add DEBUG after commit(writer). --- libraries/liblmdb/mdb.c | 97 ++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 820417d3d8..1668cb7758 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1239,6 +1239,19 @@ static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp); static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp); static int mdb_page_touch(MDB_cursor *mc); +#define MDB_END_NAMES {"committed", "empty-commit", "abort", "reset", \ + "reset-tmp", "fail-begin", "fail-beginchild"} +enum { + /* mdb_txn_end operation number, for logging */ + MDB_END_COMMITTED, MDB_END_EMPTY_COMMIT, MDB_END_ABORT, MDB_END_RESET, + MDB_END_RESET_TMP, MDB_END_FAIL_BEGIN, MDB_END_FAIL_BEGINCHILD +}; +#define MDB_END_OPMASK 0x0F /**< mask for #mdb_txn_end() operation number */ +#define MDB_END_UPDATE 0x10 /**< update env state (DBIs) */ +#define MDB_END_FREE 0x20 /**< free txn unless it is #MDB_env.%me_txn0 */ +#define MDB_END_SLOT MDB_NOTLS /**< release any reader slot if #MDB_NOTLS */ +static void mdb_txn_end(MDB_txn *txn, unsigned mode); + static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); static int mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int modify); @@ -2484,12 +2497,6 @@ mdb_cursors_close(MDB_txn *txn, unsigned merge) } } -#if !(MDB_DEBUG) -#define mdb_txn_reset0(txn, act) mdb_txn_reset0(txn) -#endif -static void -mdb_txn_reset0(MDB_txn *txn, const char *act); - #if !(MDB_PIDLOCK) /* Currently the same as defined(_WIN32) */ enum Pidlock_op { Pidset, Pidcheck @@ -2669,11 +2676,7 @@ mdb_txn_renew0(MDB_txn *txn) txn->mt_dbflags[FREE_DBI] = DB_VALID; if (env->me_maxpg < txn->mt_next_pgno) { - mdb_txn_reset0(txn, "renew0-mapfail"); - if (new_notls) { - txn->mt_u.reader->mr_pid = 0; - txn->mt_u.reader = NULL; - } + mdb_txn_end(txn, new_notls /*0 or MDB_END_SLOT*/ | MDB_END_FAIL_BEGIN); return MDB_MAP_RESIZED; } @@ -2788,7 +2791,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) if (!rc) rc = mdb_cursor_shadow(parent, txn); if (rc) - mdb_txn_reset0(txn, "beginchild-fail"); + mdb_txn_end(txn, MDB_END_FAIL_BEGINCHILD); } else { /* MDB_RDONLY */ txn->mt_dbiseqs = env->me_dbiseqs; renew: @@ -2851,35 +2854,44 @@ mdb_dbis_update(MDB_txn *txn, int keep) env->me_numdbs = n; } -/** Common code for #mdb_txn_reset() and #mdb_txn_abort(). +/** End a transaction, except successful commit of a nested transaction. * May be called twice for readonly txns: First reset it, then abort. - * @param[in] txn the transaction handle to reset - * @param[in] act why the transaction is being reset + * @param[in] txn the transaction handle to end + * @param[in] mode why and how to end the transaction */ static void -mdb_txn_reset0(MDB_txn *txn, const char *act) +mdb_txn_end(MDB_txn *txn, unsigned mode) { MDB_env *env = txn->mt_env; +#if MDB_DEBUG + static const char *const names[] = MDB_END_NAMES; +#endif - /* Close any DBI handles opened in this txn */ - mdb_dbis_update(txn, 0); + /* Export or close DBI handles opened in this txn */ + mdb_dbis_update(txn, mode & MDB_END_UPDATE); DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", - act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + names[mode & MDB_END_OPMASK], + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root)); if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { if (txn->mt_u.reader) { txn->mt_u.reader->mr_txnid = (txnid_t)-1; - if (!(env->me_flags & MDB_NOTLS)) + if (!(env->me_flags & MDB_NOTLS)) { txn->mt_u.reader = NULL; /* txn does not own reader */ + } else if (mode & MDB_END_SLOT) { + txn->mt_u.reader->mr_pid = 0; + txn->mt_u.reader = NULL; + } /* else txn owns the slot until it does MDB_END_SLOT */ } txn->mt_numdbs = 0; /* close nothing if called again */ txn->mt_dbxs = NULL; /* mark txn as reset */ } else { pgno_t *pghead = env->me_pghead; - mdb_cursors_close(txn, 0); + if (!(mode & MDB_END_UPDATE)) /* !(already closed cursors) */ + mdb_cursors_close(txn, 0); if (!(env->me_flags & MDB_WRITEMAP)) { mdb_dlist_free(txn); } @@ -2892,6 +2904,8 @@ mdb_txn_reset0(MDB_txn *txn, const char *act) env->me_pglast = 0; env->me_txn = NULL; + mode = 0; /* txn == env->me_txn0, do not free() it */ + /* The writer mutex was locked in mdb_txn_begin. */ if (env->me_txns) UNLOCK_MUTEX(env->me_wmutex); @@ -2905,6 +2919,9 @@ mdb_txn_reset0(MDB_txn *txn, const char *act) mdb_midl_free(pghead); } + + if (mode & MDB_END_FREE) + free(txn); } void @@ -2917,7 +2934,7 @@ mdb_txn_reset(MDB_txn *txn) if (!(txn->mt_flags & MDB_TXN_RDONLY)) return; - mdb_txn_reset0(txn, "reset"); + mdb_txn_end(txn, MDB_END_RESET); } void @@ -2929,13 +2946,7 @@ mdb_txn_abort(MDB_txn *txn) if (txn->mt_child) mdb_txn_abort(txn->mt_child); - mdb_txn_reset0(txn, "abort"); - /* Free reader slot tied to this txn (if MDB_NOTLS && writable FS) */ - if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader) - txn->mt_u.reader->mr_pid = 0; - - if (txn != txn->mt_env->me_txn0) - free(txn); + mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE); } /** Save the freelist as of this transaction to the freeDB. @@ -3286,12 +3297,15 @@ int mdb_txn_commit(MDB_txn *txn) { int rc; - unsigned int i; + unsigned int i, end_mode; MDB_env *env; if (txn == NULL) return EINVAL; + /* mdb_txn_end() mode for a commit which writes nothing */ + end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE; + if (txn->mt_child) { rc = mdb_txn_commit(txn->mt_child); if (rc) @@ -3301,10 +3315,7 @@ mdb_txn_commit(MDB_txn *txn) env = txn->mt_env; if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { - mdb_dbis_update(txn, 1); - txn->mt_numdbs = 2; /* so txn_abort() doesn't close any new handles */ - mdb_txn_abort(txn); - return MDB_SUCCESS; + goto done; } if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { @@ -3472,7 +3483,6 @@ mdb_txn_commit(MDB_txn *txn) mdb_midl_free(env->me_pghead); env->me_pghead = NULL; mdb_midl_shrink(&txn->mt_free_pgs); - env->me_free_pgs = txn->mt_free_pgs; #if (MDB_DEBUG) > 2 mdb_audit(txn); @@ -3482,21 +3492,10 @@ mdb_txn_commit(MDB_txn *txn) (rc = mdb_env_sync(env, 0)) || (rc = mdb_env_write_meta(txn))) goto fail; - - /* Free P_LOOSE pages left behind in dirty_list */ - if (!(env->me_flags & MDB_WRITEMAP)) - mdb_dlist_free(txn); + end_mode = MDB_END_COMMITTED|MDB_END_UPDATE; done: - env->me_pglast = 0; - env->me_txn = NULL; - mdb_dbis_update(txn, 1); - - if (env->me_txns) - UNLOCK_MUTEX(env->me_wmutex); - if (txn != env->me_txn0) - free(txn); - + mdb_txn_end(txn, end_mode); return MDB_SUCCESS; fail: @@ -8969,7 +8968,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) if (env->me_txns) { /* We must start the actual read txn after blocking writers */ - mdb_txn_reset0(txn, "reset-stage1"); + mdb_txn_end(txn, MDB_END_RESET_TMP); /* Temporarily block writers until we snapshot the meta pages */ wmutex = env->me_wmutex; From a3ae2e7db1de68989a56d86aadcce68eb86e1e8b Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 19 Jul 2015 21:31:25 +0200 Subject: [PATCH 25/34] Catch most uses of finished/parent txns. * Add MDB_TXN_FINISHED, MDB_TXN_HAS_CHILD, MDB_TXN_BLOCKED. * Clear mt_numdbs in writers, for TXN_DBI_EXIST() to catch. We already do in readers. --- libraries/liblmdb/lmdb.h | 2 +- libraries/liblmdb/mdb.c | 57 ++++++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index 1e934f4767..19fb1fe07a 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -431,7 +431,7 @@ typedef enum MDB_cursor_op { #define MDB_INCOMPATIBLE (-30784) /** Invalid reuse of reader locktable slot */ #define MDB_BAD_RSLOT (-30783) - /** Transaction cannot recover - it must be aborted */ + /** Transaction must abort, has a child, or is invalid */ #define MDB_BAD_TXN (-30782) /** Unsupported size of key/DB name/data, or wrong DUPFIXED size */ #define MDB_BAD_VALSIZE (-30781) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 1668cb7758..f38d631dc8 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -995,7 +995,8 @@ typedef struct MDB_dbx { */ struct MDB_txn { MDB_txn *mt_parent; /**< parent of a nested txn */ - MDB_txn *mt_child; /**< nested txn under this txn */ + /** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */ + MDB_txn *mt_child; pgno_t mt_next_pgno; /**< next unallocated page */ /** The ID of this transaction. IDs are integers incrementing from 1. * Only committed write transactions increment the ID. If a transaction @@ -1043,8 +1044,9 @@ struct MDB_txn { MDB_cursor **mt_cursors; /** Array of flags for each DB */ unsigned char *mt_dbflags; - /** Number of DB records in use. This number only ever increments; - * we don't decrement it when individual DB handles are closed. + /** Number of DB records in use, or 0 when the txn is finished. + * This number only ever increments until the txn finishes; we + * don't decrement it when individual DB handles are closed. */ MDB_dbi mt_numdbs; @@ -1057,9 +1059,13 @@ struct MDB_txn { #define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */ /* internal txn flags */ #define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */ +#define MDB_TXN_FINISHED 0x01 /**< txn is finished or never began */ #define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */ #define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */ #define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */ +#define MDB_TXN_HAS_CHILD 0x10 /**< txn has an #MDB_txn.%mt_child */ + /** most operations on the txn are currently illegal */ +#define MDB_TXN_BLOCKED (MDB_TXN_FINISHED|MDB_TXN_ERROR|MDB_TXN_HAS_CHILD) /** @} */ unsigned int mt_flags; /**< @ref mdb_txn */ /** #dirty_list room: Array size - \#dirty pages visible to this txn. @@ -1356,7 +1362,7 @@ static char *const mdb_errstr[] = { "MDB_MAP_RESIZED: Database contents grew beyond environment mapsize", "MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed", "MDB_BAD_RSLOT: Invalid reuse of reader locktable slot", - "MDB_BAD_TXN: Transaction cannot recover - it must be aborted", + "MDB_BAD_TXN: Transaction must abort, has a child, or is invalid", "MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size", "MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly", }; @@ -2725,9 +2731,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ flags |= parent->mt_flags; - if (parent->mt_child || - (flags & (MDB_RDONLY|MDB_WRITEMAP|MDB_TXN_ERROR))) - { + if (flags & (MDB_RDONLY|MDB_WRITEMAP|MDB_TXN_BLOCKED)) { return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN; } /* Child txns save MDB_pgstate and use own copy of cursors */ @@ -2769,6 +2773,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) txn->mt_u.dirty_list[0].mid = 0; txn->mt_spill_pgs = NULL; txn->mt_next_pgno = parent->mt_next_pgno; + parent->mt_flags |= MDB_TXN_HAS_CHILD; parent->mt_child = txn; txn->mt_parent = parent; txn->mt_numdbs = parent->mt_numdbs; @@ -2885,9 +2890,10 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) txn->mt_u.reader = NULL; } /* else txn owns the slot until it does MDB_END_SLOT */ } - txn->mt_numdbs = 0; /* close nothing if called again */ + txn->mt_numdbs = 0; /* prevent further DBI activity */ + txn->mt_flags |= MDB_TXN_FINISHED; txn->mt_dbxs = NULL; /* mark txn as reset */ - } else { + } else if (!F_ISSET(txn->mt_flags, MDB_TXN_FINISHED)) { pgno_t *pghead = env->me_pghead; if (!(mode & MDB_END_UPDATE)) /* !(already closed cursors) */ @@ -2896,6 +2902,9 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) mdb_dlist_free(txn); } + txn->mt_numdbs = 0; + txn->mt_flags = MDB_TXN_FINISHED; + if (!txn->mt_parent) { mdb_midl_shrink(&txn->mt_free_pgs); env->me_free_pgs = txn->mt_free_pgs; @@ -2911,6 +2920,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) UNLOCK_MUTEX(env->me_wmutex); } else { txn->mt_parent->mt_child = NULL; + txn->mt_parent->mt_flags &= ~MDB_TXN_HAS_CHILD; env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; mdb_midl_free(txn->mt_free_pgs); mdb_midl_free(txn->mt_spill_pgs); @@ -3318,8 +3328,8 @@ mdb_txn_commit(MDB_txn *txn) goto done; } - if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { - DPUTS("error flag is set, can't commit"); + if (txn->mt_flags & (MDB_TXN_FINISHED|MDB_TXN_ERROR)) { + DPUTS("txn has failed/finished, can't commit"); if (txn->mt_parent) txn->mt_parent->mt_flags |= MDB_TXN_ERROR; rc = MDB_BAD_TXN; @@ -4742,6 +4752,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_env = env; txn->mt_dbxs = env->me_dbxs; + txn->mt_flags = MDB_TXN_FINISHED; env->me_txn0 = txn; } else { rc = ENOMEM; @@ -5284,8 +5295,8 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) /* Make sure the txn is still viable, then find the root from * the txn's db table and set it as the root of the cursor's stack. */ - if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_ERROR)) { - DPUTS("transaction has failed, must abort"); + if (mc->mc_txn->mt_flags & MDB_TXN_BLOCKED) { + DPUTS("transaction may not be used now"); return MDB_BAD_TXN; } else { /* Make sure we're using an up-to-date root */ @@ -5472,7 +5483,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, if (!key || !data || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; - if (txn->mt_flags & MDB_TXN_ERROR) + if (txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; mdb_cursor_init(&mc, txn, dbi, &mx); @@ -5993,7 +6004,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (mc == NULL) return EINVAL; - if (mc->mc_txn->mt_flags & MDB_TXN_ERROR) + if (mc->mc_txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; switch (op) { @@ -6223,7 +6234,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, nospill = flags & MDB_NOSPILL; flags &= ~MDB_NOSPILL; - if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED)) return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; if (key->mv_size-1 >= ENV_MAXKEY(env)) @@ -6716,7 +6727,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) MDB_page *mp; int rc; - if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED)) return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; if (!(mc->mc_flags & C_INITIALIZED)) @@ -7285,7 +7296,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) if (!ret || !TXN_DBI_EXIST(txn, dbi, DB_VALID)) return EINVAL; - if (txn->mt_flags & MDB_TXN_ERROR) + if (txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; /* Allow read access to the freelist */ @@ -7320,7 +7331,7 @@ mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors) return EINVAL; - if (txn->mt_flags & MDB_TXN_ERROR) + if (txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); @@ -7339,7 +7350,7 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp) if (mc->mc_xcursor == NULL) return MDB_INCOMPATIBLE; - if (mc->mc_txn->mt_flags & MDB_TXN_ERROR) + if (mc->mc_txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; if (!(mc->mc_flags & C_INITIALIZED)) @@ -8089,7 +8100,7 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi, if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) return EINVAL; - if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED)) return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { @@ -9276,7 +9287,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db if (flags & ~VALID_FLAGS) return EINVAL; - if (txn->mt_flags & MDB_TXN_ERROR) + if (txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; /* main DB? */ @@ -9374,7 +9385,7 @@ mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) if (!arg || !TXN_DBI_EXIST(txn, dbi, DB_VALID)) return EINVAL; - if (txn->mt_flags & MDB_TXN_ERROR) + if (txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; if (txn->mt_dbflags[dbi] & DB_STALE) { From 602c9787614324be6b6f4846d70d0998ef876f3b Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 19 Jul 2015 21:31:54 +0200 Subject: [PATCH 26/34] Cleanup: Drop !mt_dbxs hack, use MDB_TXN_FINISHED --- libraries/liblmdb/mdb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index f38d631dc8..fb12cf4459 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -2633,7 +2633,7 @@ mdb_txn_renew0(MDB_txn *txn) txn->mt_u.reader = r; meta = env->me_metas[txn->mt_txnid & 1]; } - txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + } else { /* Not yet touching txn == env->me_txn0, it may be active */ if (ti) { @@ -2694,7 +2694,7 @@ mdb_txn_renew(MDB_txn *txn) { int rc; - if (!txn || txn->mt_dbxs) /* A reset txn has mt_dbxs==NULL */ + if (!txn || !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY|MDB_TXN_FINISHED)) return EINVAL; if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { @@ -2751,6 +2751,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) DPRINTF(("calloc: %s", strerror(errno))); return ENOMEM; } + txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs; txn->mt_flags = flags; @@ -2777,7 +2778,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) parent->mt_child = txn; txn->mt_parent = parent; txn->mt_numdbs = parent->mt_numdbs; - txn->mt_dbxs = parent->mt_dbxs; memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); /* Copy parent's mt_dbflags, but clear DB_NEW */ for (i=0; imt_numdbs; i++) @@ -2892,7 +2892,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) } txn->mt_numdbs = 0; /* prevent further DBI activity */ txn->mt_flags |= MDB_TXN_FINISHED; - txn->mt_dbxs = NULL; /* mark txn as reset */ + } else if (!F_ISSET(txn->mt_flags, MDB_TXN_FINISHED)) { pgno_t *pghead = env->me_pghead; From 01342bd5d202596f61589b03ec40dc3377fdb0c5 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 19 Jul 2015 21:43:10 +0200 Subject: [PATCH 27/34] ITS#7377 Always notice env error on txn startup. Move the check to the end of txn startup. Catches env breakage which happens while the new txn waits for a lock. --- libraries/liblmdb/mdb.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index fb12cf4459..4a2dd0a51d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -2681,12 +2681,16 @@ mdb_txn_renew0(MDB_txn *txn) txn->mt_dbflags[MAIN_DBI] = DB_VALID|DB_USRVALID; txn->mt_dbflags[FREE_DBI] = DB_VALID; - if (env->me_maxpg < txn->mt_next_pgno) { - mdb_txn_end(txn, new_notls /*0 or MDB_END_SLOT*/ | MDB_END_FAIL_BEGIN); - return MDB_MAP_RESIZED; + if (env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + rc = MDB_PANIC; + } else if (env->me_maxpg < txn->mt_next_pgno) { + rc = MDB_MAP_RESIZED; + } else { + return MDB_SUCCESS; } - - return MDB_SUCCESS; + mdb_txn_end(txn, new_notls /*0 or MDB_END_SLOT*/ | MDB_END_FAIL_BEGIN); + return rc; } int @@ -2697,11 +2701,6 @@ mdb_txn_renew(MDB_txn *txn) if (!txn || !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY|MDB_TXN_FINISHED)) return EINVAL; - if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { - DPUTS("environment had fatal error, must shutdown!"); - return MDB_PANIC; - } - rc = mdb_txn_renew0(txn); if (rc == MDB_SUCCESS) { DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", @@ -2721,10 +2720,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) flags &= MDB_TXN_BEGIN_FLAGS; flags |= env->me_flags & MDB_WRITEMAP; - if (env->me_flags & MDB_FATAL_ERROR) { - DPUTS("environment had fatal error, must shutdown!"); - return MDB_PANIC; - } if (env->me_flags & MDB_RDONLY & ~flags) /* write txn in RDONLY env */ return EACCES; From 8901d9cfbcbe6a8a3278040485a4481af399e3bd Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 19 Jul 2015 21:44:02 +0200 Subject: [PATCH 28/34] ITS#7377 Catch mdb_put() to blocked/read-only txns ...early enough that txn state is left unchanged. --- libraries/liblmdb/mdb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 4a2dd0a51d..b78178e4c3 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8564,6 +8564,9 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, if (flags & ~(MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) return EINVAL; + if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED)) + return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + mdb_cursor_init(&mc, txn, dbi, &mx); return mdb_cursor_put(&mc, key, data, flags); } From bde5c231bc7fbecfcd3ae1900f1454e89f122225 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 26 Jul 2015 07:16:41 +0200 Subject: [PATCH 29/34] Kill magic numbers for NUM_METAS, databases --- libraries/liblmdb/mdb.c | 101 +++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index b78178e4c3..8f29ec560c 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -943,6 +943,11 @@ typedef struct MDB_db { #define FREE_DBI 0 /** Handle for the default DB. */ #define MAIN_DBI 1 + /** Number of DBs in metapage (free and main) - also hardcoded elsewhere */ +#define CORE_DBS 2 + + /** Number of meta pages - also hardcoded elsewhere */ +#define NUM_METAS 2 /** Meta page content. * A meta page is the start point for accessing a database snapshot. @@ -956,11 +961,11 @@ typedef struct MDB_meta { uint32_t mm_version; void *mm_address; /**< address for fixed mapping */ size_t mm_mapsize; /**< size of mmap region */ - MDB_db mm_dbs[2]; /**< first is free space, 2nd is main db */ + MDB_db mm_dbs[CORE_DBS]; /**< first is free space, 2nd is main db */ /** The size of pages used in this DB */ -#define mm_psize mm_dbs[0].md_pad +#define mm_psize mm_dbs[FREE_DBI].md_pad /** Any persistent environment flags. @ref mdb_env */ -#define mm_flags mm_dbs[0].md_flags +#define mm_flags mm_dbs[FREE_DBI].md_flags pgno_t mm_last_pg; /**< last used page in file */ volatile txnid_t mm_txnid; /**< txnid that committed this page */ } MDB_meta; @@ -1175,7 +1180,7 @@ struct MDB_env { char *me_path; /**< path to the DB files */ char *me_map; /**< the memory map of the data file */ MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */ - MDB_meta *me_metas[2]; /**< pointers to the two meta pages */ + MDB_meta *me_metas[NUM_METAS]; /**< pointers to the two meta pages */ void *me_pbuf; /**< scratch area for DUPSORT put() */ MDB_txn *me_txn; /**< current write transaction */ MDB_txn *me_txn0; /**< prealloc'd write transaction */ @@ -1624,9 +1629,10 @@ static void mdb_audit(MDB_txn *txn) mdb_tassert(txn, rc == MDB_NOTFOUND); } } - if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) { + if (freecount + count + NUM_METAS != txn->mt_next_pgno) { fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n", - txn->mt_txnid, freecount, count+2, freecount+count+2, txn->mt_next_pgno); + txn->mt_txnid, freecount, count+NUM_METAS, + freecount+count+NUM_METAS, txn->mt_next_pgno); } } #endif @@ -1895,7 +1901,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) /* Estimate how much space this op will take */ i = m0->mc_db->md_depth; /* Named DBs also dirty the main DB */ - if (m0->mc_dbi > MAIN_DBI) + if (m0->mc_dbi >= CORE_DBS) i += txn->mt_dbs[MAIN_DBI].md_depth; /* For puts, roughly factor in the key+data size */ if (key) @@ -2664,7 +2670,7 @@ mdb_txn_renew0(MDB_txn *txn) } /* Copy the DB info and flags */ - memcpy(txn->mt_dbs, meta->mm_dbs, 2 * sizeof(MDB_db)); + memcpy(txn->mt_dbs, meta->mm_dbs, CORE_DBS * sizeof(MDB_db)); /* Moved to here to avoid a data race in read TXNs */ txn->mt_next_pgno = meta->mm_last_pg+1; @@ -2673,7 +2679,7 @@ mdb_txn_renew0(MDB_txn *txn) /* Setup db info */ txn->mt_numdbs = env->me_numdbs; - for (i=2; imt_numdbs; i++) { + for (i=CORE_DBS; imt_numdbs; i++) { x = env->me_dbflags[i]; txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_USRVALID|DB_STALE : 0; @@ -2834,7 +2840,7 @@ mdb_dbis_update(MDB_txn *txn, int keep) MDB_env *env = txn->mt_env; unsigned char *tdbflags = txn->mt_dbflags; - for (i = n; --i >= 2;) { + for (i = n; --i >= CORE_DBS;) { if (tdbflags[i] & DB_NEW) { if (keep) { env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID; @@ -3356,9 +3362,9 @@ mdb_txn_commit(MDB_txn *txn) /* Update parent's DB table. */ memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); parent->mt_numdbs = txn->mt_numdbs; - parent->mt_dbflags[0] = txn->mt_dbflags[0]; - parent->mt_dbflags[1] = txn->mt_dbflags[1]; - for (i=2; imt_numdbs; i++) { + parent->mt_dbflags[FREE_DBI] = txn->mt_dbflags[FREE_DBI]; + parent->mt_dbflags[MAIN_DBI] = txn->mt_dbflags[MAIN_DBI]; + for (i=CORE_DBS; imt_numdbs; i++) { /* preserve parent's DB_NEW status */ x = parent->mt_dbflags[i] & DB_NEW; parent->mt_dbflags[i] = txn->mt_dbflags[i] | x; @@ -3459,14 +3465,14 @@ mdb_txn_commit(MDB_txn *txn) txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root)); /* Update DB root pointers */ - if (txn->mt_numdbs > 2) { + if (txn->mt_numdbs > CORE_DBS) { MDB_cursor mc; MDB_dbi i; MDB_val data; data.mv_size = sizeof(MDB_db); mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); - for (i = 2; i < txn->mt_numdbs; i++) { + for (i = CORE_DBS; i < txn->mt_numdbs; i++) { if (txn->mt_dbflags[i] & DB_DIRTY) { if (TXN_DBI_CHANGED(txn, i)) { rc = MDB_BAD_DBI; @@ -3527,7 +3533,7 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) * Read both meta pages so we can use the latest one. */ - for (i=off=0; i<2; i++, off = meta->mm_psize) { + for (i=off=0; imm_psize) { #ifdef _WIN32 DWORD len; OVERLAPPED ov; @@ -3580,11 +3586,11 @@ mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) meta->mm_version = MDB_DATA_VERSION; meta->mm_mapsize = env->me_mapsize; meta->mm_psize = env->me_psize; - meta->mm_last_pg = 1; + meta->mm_last_pg = NUM_METAS-1; meta->mm_flags = env->me_flags & 0xffff; - meta->mm_flags |= MDB_INTEGERKEY; - meta->mm_dbs[0].md_root = P_INVALID; - meta->mm_dbs[1].md_root = P_INVALID; + meta->mm_flags |= MDB_INTEGERKEY; /* this is mm_dbs[FREE_DBI].md_flags */ + meta->mm_dbs[FREE_DBI].md_root = P_INVALID; + meta->mm_dbs[MAIN_DBI].md_root = P_INVALID; } /** Write the environment parameters of a freshly created DB environment. @@ -3617,7 +3623,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) psize = env->me_psize; - p = calloc(2, psize); + p = calloc(NUM_METAS, psize); if (!p) return ENOMEM; @@ -3630,10 +3636,10 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) q->mp_flags = P_META; *(MDB_meta *)METADATA(q) = *meta; - DO_PWRITE(rc, env->me_fd, p, psize * 2, len, 0); + DO_PWRITE(rc, env->me_fd, p, psize * NUM_METAS, len, 0); if (!rc) rc = ErrCode(); - else if ((unsigned) len == psize * 2) + else if ((unsigned) len == psize * NUM_METAS) rc = MDB_SUCCESS; else rc = ENOSPC; @@ -3676,8 +3682,8 @@ mdb_env_write_meta(MDB_txn *txn) if (flags & MDB_WRITEMAP) { mp->mm_mapsize = mapsize; - mp->mm_dbs[0] = txn->mt_dbs[0]; - mp->mm_dbs[1] = txn->mt_dbs[1]; + mp->mm_dbs[FREE_DBI] = txn->mt_dbs[FREE_DBI]; + mp->mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; mp->mm_last_pg = txn->mt_next_pgno - 1; #if (__GNUC__ * 100 + __GNUC_MINOR__ >= 404) && /* TODO: portability */ \ !(defined(__i386__) || defined(__x86_64__)) @@ -3708,8 +3714,8 @@ mdb_env_write_meta(MDB_txn *txn) metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; meta.mm_mapsize = mapsize; - meta.mm_dbs[0] = txn->mt_dbs[0]; - meta.mm_dbs[1] = txn->mt_dbs[1]; + meta.mm_dbs[FREE_DBI] = txn->mt_dbs[FREE_DBI]; + meta.mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; meta.mm_last_pg = txn->mt_next_pgno - 1; meta.mm_txnid = txn->mt_txnid; @@ -3794,7 +3800,7 @@ mdb_env_create(MDB_env **env) return ENOMEM; e->me_maxreaders = DEFAULT_READERS; - e->me_maxdbs = e->me_numdbs = 2; + e->me_maxdbs = e->me_numdbs = CORE_DBS; e->me_fd = INVALID_HANDLE_VALUE; e->me_lfd = INVALID_HANDLE_VALUE; e->me_mfd = INVALID_HANDLE_VALUE; @@ -3932,7 +3938,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) { if (env->me_map) return EINVAL; - env->me_maxdbs = dbs + 2; /* Named databases + main and free DB */ + env->me_maxdbs = dbs + CORE_DBS; return MDB_SUCCESS; } @@ -4774,7 +4780,7 @@ mdb_env_close0(MDB_env *env, int excl) /* Doing this here since me_dbxs may not exist during mdb_env_close */ if (env->me_dbxs) { - for (i = env->me_maxdbs; --i > MAIN_DBI; ) + for (i = env->me_maxdbs; --i >= CORE_DBS; ) free(env->me_dbxs[i].md_name.mv_data); free(env->me_dbxs); } @@ -6170,7 +6176,7 @@ mdb_cursor_touch(MDB_cursor *mc) { int rc = MDB_SUCCESS; - if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) { + if (mc->mc_dbi >= CORE_DBS && !(*mc->mc_dbflag & DB_DIRTY)) { MDB_cursor mc2; MDB_xcursor mcx; if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi)) @@ -7294,8 +7300,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) if (txn->mt_flags & MDB_TXN_BLOCKED) return MDB_BAD_TXN; - /* Allow read access to the freelist */ - if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + if (dbi == FREE_DBI && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) return EINVAL; if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) @@ -8871,7 +8876,7 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) my.mc_wlen[1] = 0; my.mc_olen[0] = 0; my.mc_olen[1] = 0; - my.mc_next_pgno = 2; + my.mc_next_pgno = NUM_METAS; my.mc_status = 0; my.mc_new = 1; my.mc_toggle = 0; @@ -8884,7 +8889,7 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) return rc; mp = (MDB_page *)my.mc_wbuf[0]; - memset(mp, 0, 2*env->me_psize); + memset(mp, 0, NUM_METAS * env->me_psize); mp->mp_pgno = 0; mp->mp_flags = P_META; mm = (MDB_meta *)METADATA(mp); @@ -8907,27 +8912,27 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) mdb_cursor_init(&mc, txn, FREE_DBI, NULL); while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) freecount += *(MDB_ID *)data.mv_data; - freecount += txn->mt_dbs[0].md_branch_pages + - txn->mt_dbs[0].md_leaf_pages + - txn->mt_dbs[0].md_overflow_pages; + freecount += txn->mt_dbs[FREE_DBI].md_branch_pages + + txn->mt_dbs[FREE_DBI].md_leaf_pages + + txn->mt_dbs[FREE_DBI].md_overflow_pages; /* Set metapage 1 */ mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; - mm->mm_dbs[1] = txn->mt_dbs[1]; - if (mm->mm_last_pg > 1) { - mm->mm_dbs[1].md_root = mm->mm_last_pg; + mm->mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; + if (mm->mm_last_pg > NUM_METAS-1) { + mm->mm_dbs[MAIN_DBI].md_root = mm->mm_last_pg; mm->mm_txnid = 1; } else { - mm->mm_dbs[1].md_root = P_INVALID; + mm->mm_dbs[MAIN_DBI].md_root = P_INVALID; } } - my.mc_wlen[0] = env->me_psize * 2; + my.mc_wlen[0] = env->me_psize * NUM_METAS; my.mc_txn = txn; pthread_mutex_lock(&my.mc_mutex); while(my.mc_new) pthread_cond_wait(&my.mc_cond, &my.mc_mutex); pthread_mutex_unlock(&my.mc_mutex); - rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); + rc = mdb_env_cwalk(&my, &txn->mt_dbs[MAIN_DBI].md_root, 0); if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) rc = mdb_env_cthr_toggle(&my, 1); mdb_env_cthr_toggle(&my, -1); @@ -8991,7 +8996,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) } } - wsize = env->me_psize * 2; + wsize = env->me_psize * NUM_METAS; ptr = env->me_map; w2 = wsize; while (w2 > 0) { @@ -9309,7 +9314,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db /* Is the DB already open? */ len = strlen(name); - for (i=2; imt_numdbs; i++) { + for (i=CORE_DBS; imt_numdbs; i++) { if (!txn->mt_dbxs[i].md_name.mv_size) { /* Remember this free slot */ if (!unused) unused = i; @@ -9398,7 +9403,7 @@ mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) void mdb_dbi_close(MDB_env *env, MDB_dbi dbi) { char *ptr; - if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs) + if (dbi < CORE_DBS || dbi >= env->me_maxdbs) return; ptr = env->me_dbxs[dbi].md_name.mv_data; /* If there was no name, this was already closed */ @@ -9535,7 +9540,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) goto leave; /* Can't delete the main DB */ - if (del && dbi > MAIN_DBI) { + if (del && dbi >= CORE_DBS) { rc = mdb_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); if (!rc) { txn->mt_dbflags[dbi] = DB_STALE; From de5b6893081a5dd64a8bcbbaa8adb8934062fe29 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 26 Jul 2015 07:20:19 +0200 Subject: [PATCH 30/34] Simpler mdb_env_write_meta() --- libraries/liblmdb/mdb.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 8f29ec560c..746f4165e9 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -3694,15 +3694,12 @@ mdb_env_write_meta(MDB_txn *txn) if (!(flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { unsigned meta_size = env->me_psize; rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; - ptr = env->me_map; - if (toggle) { + ptr = (char *)mp - PAGEHDRSZ; #ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */ - if (meta_size < env->me_os_psize) - meta_size += meta_size; - else + r2 = (ptr - env->me_map) & (env->me_os_psize - 1); + ptr -= r2; + meta_size += r2; #endif - ptr += meta_size; - } if (MDB_MSYNC(ptr, meta_size, rc)) { rc = ErrCode(); goto fail; @@ -3710,8 +3707,8 @@ mdb_env_write_meta(MDB_txn *txn) } goto done; } - metab.mm_txnid = env->me_metas[toggle]->mm_txnid; - metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; + metab.mm_txnid = mp->mm_txnid; + metab.mm_last_pg = mp->mm_last_pg; meta.mm_mapsize = mapsize; meta.mm_dbs[FREE_DBI] = txn->mt_dbs[FREE_DBI]; @@ -3722,9 +3719,7 @@ mdb_env_write_meta(MDB_txn *txn) off = offsetof(MDB_meta, mm_mapsize); ptr = (char *)&meta + off; len = sizeof(MDB_meta) - off; - if (toggle) - off += env->me_psize; - off += PAGEHDRSZ; + off += (char *)mp - env->me_map; /* Write to the SYNC fd */ mfd = (flags & (MDB_NOSYNC|MDB_NOMETASYNC)) ? env->me_fd : env->me_mfd; From 22958b0f9ed025c6df0926002c58318db33ac896 Mon Sep 17 00:00:00 2001 From: Pavel Medvedev Date: Wed, 19 Aug 2015 16:39:08 +0100 Subject: [PATCH 31/34] ITS#8067 add ssize_t typedef for MSVC --- libraries/liblmdb/mdb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 746f4165e9..947c47dccc 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -96,7 +96,13 @@ extern int cacheflush(char *addr, int nbytes, int cache); #include #include #include + +#ifdef _MSC_VER +#include +typedef SSIZE_T ssize_t; +#else #include +#endif #if defined(__sun) || defined(ANDROID) /* Most platforms have posix_memalign, older may only have memalign */ From 56adc467a641e7542f7f7a9987f097b8f56868cb Mon Sep 17 00:00:00 2001 From: Pavel Medvedev Date: Wed, 19 Aug 2015 17:19:48 +0100 Subject: [PATCH 32/34] ITS#8069 char* strings on Windows are ASCII --- libraries/liblmdb/mdb.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 947c47dccc..4bbd9c937b 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1417,7 +1417,7 @@ mdb_strerror(int err) ; } buf[0] = 0; - FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, err, 0, ptr, sizeof(buf), (va_list *)pad); return ptr; @@ -4410,7 +4410,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) off_t size, rsize; #ifdef _WIN32 - env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, + env->me_lfd = CreateFileA(lpath, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); #else @@ -4514,9 +4514,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) mdb_hash_enc(&val, encbuf); sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", encbuf); sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", encbuf); - env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); + env->me_rmutex = CreateMutexA(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); if (!env->me_rmutex) goto fail_errno; - env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); + env->me_wmutex = CreateMutexA(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); if (!env->me_wmutex) goto fail_errno; #elif defined(MDB_USE_POSIX_SEM) struct stat stbuf; @@ -4588,9 +4588,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) goto fail; } #ifdef _WIN32 - env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); + env->me_rmutex = OpenMutexA(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); if (!env->me_rmutex) goto fail_errno; - env->me_wmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname); + env->me_wmutex = OpenMutexA(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname); if (!env->me_wmutex) goto fail_errno; #elif defined(MDB_USE_POSIX_SEM) env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0); @@ -4693,7 +4693,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode len = OPEN_ALWAYS; } mode = FILE_ATTRIBUTE_NORMAL; - env->me_fd = CreateFile(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, + env->me_fd = CreateFileA(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, mode, NULL); #else if (F_ISSET(flags, MDB_RDONLY)) @@ -4723,7 +4723,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode */ #ifdef _WIN32 len = OPEN_EXISTING; - env->me_mfd = CreateFile(dpath, oflags, + env->me_mfd = CreateFileA(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, mode | FILE_FLAG_WRITE_THROUGH, NULL); #else @@ -9094,7 +9094,7 @@ mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) * already in the OS cache. */ #ifdef _WIN32 - newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + newfd = CreateFileA(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); #else newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); From f1e07f9d14bb426fafa8edd4d3c7251fd103df3a Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Thu, 1 Oct 2015 19:43:58 +0200 Subject: [PATCH 33/34] Fix typo in mdb_tassert(). No effect on current code. --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 4bbd9c937b..7a21b50ef5 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1429,7 +1429,7 @@ mdb_strerror(int err) /** assert(3) variant in cursor context */ #define mdb_cassert(mc, expr) mdb_assert0((mc)->mc_txn->mt_env, expr, #expr) /** assert(3) variant in transaction context */ -#define mdb_tassert(mc, expr) mdb_assert0((txn)->mt_env, expr, #expr) +#define mdb_tassert(txn, expr) mdb_assert0((txn)->mt_env, expr, #expr) /** assert(3) variant in environment context */ #define mdb_eassert(env, expr) mdb_assert0(env, expr, #expr) From 550df2a5837bd4d75503dcd36e8d0cded1750879 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 26 Oct 2015 20:18:19 +0000 Subject: [PATCH 34/34] More misc updates --- libraries/liblmdb/CHANGES | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libraries/liblmdb/CHANGES b/libraries/liblmdb/CHANGES index f99815db90..6129fa461c 100644 --- a/libraries/liblmdb/CHANGES +++ b/libraries/liblmdb/CHANGES @@ -8,9 +8,17 @@ LMDB 0.9.17 Release Engineering Fix ITS#8264 cursor_del cursor tracking Fix ITS#8263 cursor_put cursor tracking Fix ITS#7771 fakepage cursor tracking + Fix ITS#7789 ensure mapsize >= pages in use + Fix ITS#7971 mdb_txn_renew0() new reader slots + Fix ITS#7969 use __sync_synchronize on non-x86 + Added mdb_txn_id() (ITS#7994) + Added robust mutex support + Miscellaneous cleanup/simplification Build Create install dirs if needed (ITS#8256) Fix ThreadProc decl on Win32/MSVC (ITS#8270) + Added ssize_t typedef for MSVC (ITS#8067) + Use ANSI apis on Windows (ITS#8069) LMDB 0.9.16 Release (2015/08/14) Fix cursor EOF bug (ITS#8190)