From c542442c9f4637847422ed2368ca3391d1476628 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 17 Sep 2012 02:17:25 -0700 Subject: [PATCH 01/15] Add MDB_SET_KEY cursor op Overwrites the passed in key with the DB's key --- libraries/libmdb/mdb.c | 23 ++++++++++++----------- libraries/libmdb/mdb.h | 1 + 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 47ab07f2ae..ed1ecbb864 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -704,7 +704,7 @@ typedef struct MDB_node { #define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) /** Set the \b node's key into \b key, if requested. */ -#define MDB_SET_KEY(node, key) { if ((key) != NULL) { \ +#define MDB_GET_KEY(node, key) { if ((key) != NULL) { \ (key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node); } } /** Information about a single database in the environment. */ @@ -3895,7 +3895,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } } - MDB_SET_KEY(leaf, key); + MDB_GET_KEY(leaf, key); return MDB_SUCCESS; } @@ -3968,7 +3968,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) } } - MDB_SET_KEY(leaf, key); + MDB_GET_KEY(leaf, key); return MDB_SUCCESS; } @@ -4000,7 +4000,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size); } else { leaf = NODEPTR(mp, 0); - MDB_SET_KEY(leaf, &nodekey); + MDB_GET_KEY(leaf, &nodekey); } rc = mc->mc_dbx->md_cmp(key, &nodekey); if (rc == 0) { @@ -4021,7 +4021,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, nkeys-1, nodekey.mv_size); } else { leaf = NODEPTR(mp, nkeys-1); - MDB_SET_KEY(leaf, &nodekey); + MDB_GET_KEY(leaf, &nodekey); } rc = mc->mc_dbx->md_cmp(key, &nodekey); if (rc == 0) { @@ -4039,7 +4039,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, mc->mc_ki[mc->mc_top], nodekey.mv_size); } else { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - MDB_SET_KEY(leaf, &nodekey); + MDB_GET_KEY(leaf, &nodekey); } rc = mc->mc_dbx->md_cmp(key, &nodekey); if (rc == 0) { @@ -4111,7 +4111,7 @@ set1: } if (data) { if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - if (op == MDB_SET || op == MDB_SET_RANGE) { + if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); } else { int ex2, *ex2p; @@ -4144,8 +4144,8 @@ set1: } /* The key already matches in all other cases */ - if (op == MDB_SET_RANGE) - MDB_SET_KEY(leaf, key); + if (op == MDB_SET_RANGE || op == MDB_SET_KEY) + MDB_GET_KEY(leaf, key); DPRINTF("==> cursor placed on key [%s]", DKEY(key)); return rc; @@ -4190,7 +4190,7 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) return rc; } } - MDB_SET_KEY(leaf, key); + MDB_GET_KEY(leaf, key); return MDB_SUCCESS; } @@ -4239,7 +4239,7 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) } } - MDB_SET_KEY(leaf, key); + MDB_GET_KEY(leaf, key); return MDB_SUCCESS; } @@ -4261,6 +4261,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, } /* FALLTHRU */ case MDB_SET: + case MDB_SET_KEY: case MDB_SET_RANGE: if (key == NULL || key->mv_size == 0 || key->mv_size > MAXKEYSIZE) { rc = EINVAL; diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 7ef54e8b5d..43eb789bd2 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -247,6 +247,7 @@ typedef enum MDB_cursor_op { MDB_PREV_NODUP, /**< Position at last data item of previous key. Only for #MDB_DUPSORT */ MDB_SET, /**< Position at specified key */ + MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ } MDB_cursor_op; From 076b2b36a97b0d4cf206e517ef63965976ad5f75 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 17 Sep 2012 03:48:54 -0700 Subject: [PATCH 02/15] Shared lib should depend on pthread --- libraries/libmdb/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/libmdb/Makefile b/libraries/libmdb/Makefile index 1fde58889c..796be078d4 100644 --- a/libraries/libmdb/Makefile +++ b/libraries/libmdb/Makefile @@ -19,7 +19,7 @@ libmdb.a: mdb.o midl.o ar rs $@ mdb.o midl.o libmdb.so: mdb.o midl.o - gcc -shared -o $@ mdb.o midl.o $(SOLIBS) + gcc -pthread -shared -o $@ mdb.o midl.o $(SOLIBS) mdb_stat: mdb_stat.o libmdb.a mtest: mtest.o libmdb.a From acbff5b1eac9950c04780f8bcb4933cbc38fc9d8 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 17 Sep 2012 04:41:13 -0700 Subject: [PATCH 03/15] Add mdb_cursor_renew() Allow cursors on read-only txns to be reused with later txns. --- libraries/libmdb/mdb.c | 13 +++++++++++++ libraries/libmdb/mdb.h | 17 +++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index ed1ecbb864..345ebf436c 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -5285,6 +5285,19 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) return MDB_SUCCESS; } +int +mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) +{ + if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs) + return EINVAL; + + if (txn->mt_cursors) + return EINVAL; + + mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); + return MDB_SUCCESS; +} + /* Return the count of duplicate data items for the current key */ int mdb_cursor_count(MDB_cursor *mc, size_t *countp) diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 43eb789bd2..6e9cd2b323 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -904,6 +904,23 @@ int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor); */ void mdb_cursor_close(MDB_cursor *cursor); + /** @brief Renew a cursor handle. + * + * Cursors are associated with a specific transaction and database and + * may not span threads. Cursors that are only used in read-only + * transactions may be re-used, to avoid unnecessary malloc/free overhead. + * The cursor may be associated with a new read-only transaction, and + * referencing the same database handle as it was created with. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *cursor); + /** @brief Return the cursor's transaction handle. * * @param[in] cursor A cursor handle returned by #mdb_cursor_open() From 8bb10add2465eee34e79abeaa62011e3e234effb Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 17 Sep 2012 06:35:03 -0700 Subject: [PATCH 04/15] More for ab04c50a32275e216b82edaeeed50208cf49336b Fix typos, error code ranges --- libraries/libmdb/mdb.c | 11 +++++++---- libraries/libmdb/mdb.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 345ebf436c..8252cd63bc 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -1024,24 +1024,27 @@ static char *const mdb_errstr[] = { "MDB_CORRUPTED: Located page was wrong type", "MDB_PANIC: Update of meta page failed", "MDB_VERSION_MISMATCH: Database environment version mismatch", - "MDB_INVALID: File is not an MDB file" + "MDB_INVALID: File is not an MDB file", "MDB_MAP_FULL: Environment mapsize limit reached", "MDB_DBS_FULL: Environment maxdbs limit reached", "MDB_READERS_FULL: Environment maxreaders limit reached", "MDB_TLS_FULL: Thread-local storage keys full - too many environments open", "MDB_TXN_FULL: Nested transaction has too many dirty pages - transaction too big", "MDB_CURSOR_FULL: Internal error - cursor stack limit reached", - "MDB_PAGE_FULL: Internal error - page has no more space", + "MDB_PAGE_FULL: Internal error - page has no more space" }; char * mdb_strerror(int err) { + int i; if (!err) return ("Successful return: 0"); - if (err >= MDB_KEYEXIST && err <= MDB_VERSION_MISMATCH) - return mdb_errstr[err - MDB_KEYEXIST]; + if (err >= MDB_KEYEXIST && err <= MDB_LAST_ERRCODE) { + i = err - MDB_KEYEXIST; + return mdb_errstr[i]; + } return strerror(err); } diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 6e9cd2b323..7fc1bf217e 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -286,6 +286,7 @@ typedef enum MDB_cursor_op { #define MDB_CURSOR_FULL (-30787) /** Page has not enough space - internal error */ #define MDB_PAGE_FULL (-30786) +#define MDB_LAST_ERRCODE MDB_PAGE_FULL /** @} */ /** @brief Statistics for a database in the environment */ From 588a84a5ae0eb9ea343802e24b199af835204bfd Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:14 +0200 Subject: [PATCH 05/15] ITS#7363 Preprocessor namespace cleanup. Rename USE_POSIX_SEM to MDB_USE_POSIX_SEM. Separate MDB_FDATASYNC from MDB_USE_POSIX_SEM. --- libraries/libmdb/mdb.c | 43 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 8252cd63bc..092aa1c5ed 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -62,12 +62,15 @@ #endif #if defined(__APPLE__) || defined (BSD) -#define USE_POSIX_SEM +# define MDB_USE_POSIX_SEM 1 +# define MDB_FDATASYNC fsync +#elif defined(ANDROID) +# define MDB_FDATASYNC fsync #endif #ifndef _WIN32 #include -#ifdef USE_POSIX_SEM +#ifdef MDB_USE_POSIX_SEM #include #endif #endif @@ -155,16 +158,12 @@ #define close(fd) CloseHandle(fd) #define munmap(ptr,len) UnmapViewOfFile(ptr) #else -#ifdef USE_POSIX_SEM +#ifdef MDB_USE_POSIX_SEM #define LOCK_MUTEX_R(env) sem_wait((env)->me_rmutex) #define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex) #define LOCK_MUTEX_W(env) sem_wait((env)->me_wmutex) #define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex) -#define MDB_FDATASYNC(fd) fsync(fd) #else -#ifdef ANDROID -#define MDB_FDATASYNC(fd) fsync(fd) -#endif /** Lock the reader mutex. */ #define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex) @@ -180,7 +179,7 @@ /** Unlock the writer mutex. */ #define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex) -#endif /* USE_POSIX_SEM */ +#endif /* MDB_USE_POSIX_SEM */ /** Get the error code for the last failed system function. */ @@ -205,7 +204,7 @@ #define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) #endif -#if defined(_WIN32) || defined(USE_POSIX_SEM) +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) #define MNAME_LEN 32 #else #define MNAME_LEN (sizeof(pthread_mutex_t)) @@ -481,7 +480,7 @@ typedef struct MDB_txbody { uint32_t mtb_magic; /** Version number of this lock file. Must be set to #MDB_VERSION. */ uint32_t mtb_version; -#if defined(_WIN32) || defined(USE_POSIX_SEM) +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) char mtb_rmname[MNAME_LEN]; #else /** Mutex protecting access to this table. @@ -514,7 +513,7 @@ typedef struct MDB_txninfo { char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; } mt1; union { -#if defined(_WIN32) || defined(USE_POSIX_SEM) +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) char mt2_wmname[MNAME_LEN]; #define mti_wmname mt2.mt2_wmname #else @@ -930,9 +929,8 @@ struct MDB_env { #ifdef _WIN32 HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */ HANDLE me_wmutex; -#endif -#ifdef USE_POSIX_SEM - sem_t *me_rmutex; /* Apple doesn't support shared mutexes */ +#elif defined(MDB_USE_POSIX_SEM) + sem_t *me_rmutex; /* Shared mutexes are not supported */ sem_t *me_wmutex; #endif }; @@ -2792,7 +2790,7 @@ mdb_env_excl_lock(MDB_env *env, int *excl) return 0; } -#if defined(_WIN32) || defined(USE_POSIX_SEM) +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) /* * hash_64 - 64 bit Fowler/Noll/Vo-0 FNV-1a hash code * @@ -3002,8 +3000,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) rc = ErrCode(); goto fail; } -#else /* _WIN32 */ -#ifdef USE_POSIX_SEM +#elif defined(MDB_USE_POSIX_SEM) struct stat stbuf; struct { dev_t dev; @@ -3040,7 +3037,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) rc = ErrCode(); goto fail; } -#else /* USE_POSIX_SEM */ +#else /* MDB_USE_POSIX_SEM */ pthread_mutexattr_t mattr; pthread_mutexattr_init(&mattr); @@ -3050,8 +3047,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) } pthread_mutex_init(&env->me_txns->mti_mutex, &mattr); pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr); -#endif /* USE_POSIX_SEM */ -#endif /* _WIN32 */ +#endif /* _WIN32 || MDB_USE_POSIX_SEM */ env->me_txns->mti_version = MDB_VERSION; env->me_txns->mti_magic = MDB_MAGIC; env->me_txns->mti_txnid = 0; @@ -3084,8 +3080,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) rc = ErrCode(); goto fail; } -#endif -#ifdef USE_POSIX_SEM +#elif defined(MDB_USE_POSIX_SEM) env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0); if (env->me_rmutex == SEM_FAILED) { rc = ErrCode(); @@ -3271,8 +3266,7 @@ mdb_env_close(MDB_env *env) /* Windows automatically destroys the mutexes when * the last handle closes. */ -#else -#ifdef USE_POSIX_SEM +#elif defined(MDB_USE_POSIX_SEM) sem_close(env->me_rmutex); sem_close(env->me_wmutex); { int excl = 0; @@ -3283,7 +3277,6 @@ mdb_env_close(MDB_env *env) sem_unlink(env->me_txns->mti_wmname); } } -#endif #endif munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); } From 20a216fcc9cbfad838f5bfc970a160a23e9fe2be Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:14 +0200 Subject: [PATCH 06/15] MDB_WRITEMAP needs no DSYNC descriptor (me_mfd) --- libraries/libmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 092aa1c5ed..809f62e22d 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -3163,7 +3163,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) } if ((rc = mdb_env_open2(env, flags)) == MDB_SUCCESS) { - if (flags & (MDB_RDONLY|MDB_NOSYNC|MDB_NOMETASYNC)) { + if (flags & (MDB_RDONLY|MDB_NOSYNC|MDB_NOMETASYNC|MDB_WRITEMAP)) { env->me_mfd = env->me_fd; } else { /* synchronous fd for meta writes */ From 38cc1e96b48ef0d0f00b79543c4c6db879537546 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:14 +0200 Subject: [PATCH 07/15] Save pid in MDB_env instead of repeating getpid(). An open MDB environment does not survive or catch fork(), so repeating getpid() was pointless. --- libraries/libmdb/mdb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 809f62e22d..37403fbcb0 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -906,6 +906,7 @@ struct MDB_env { unsigned int me_maxreaders; /**< size of the reader table */ MDB_dbi me_numdbs; /**< number of DBs opened */ MDB_dbi me_maxdbs; /**< size of the DB table */ + pid_t me_pid; /**< process ID of this env */ char *me_path; /**< path to the DB files */ char *me_map; /**< the memory map of the data file */ MDB_txninfo *me_txns; /**< the memory map of the lock file */ @@ -1625,7 +1626,7 @@ mdb_txn_renew0(MDB_txn *txn) if (txn->mt_flags & MDB_TXN_RDONLY) { MDB_reader *r = pthread_getspecific(env->me_txkey); if (!r) { - pid_t pid = getpid(); + pid_t pid = env->me_pid; pthread_t tid = pthread_self(); LOCK_MUTEX_R(env); @@ -2506,6 +2507,7 @@ mdb_env_create(MDB_env **env) e->me_fd = INVALID_HANDLE_VALUE; e->me_lfd = INVALID_HANDLE_VALUE; e->me_mfd = INVALID_HANDLE_VALUE; + e->me_pid = getpid(); VGMEMP_CREATE(e,0,0); *env = e; return MDB_SUCCESS; @@ -3255,7 +3257,7 @@ mdb_env_close(MDB_env *env) close(env->me_mfd); close(env->me_fd); if (env->me_txns) { - pid_t pid = getpid(); + pid_t pid = env->me_pid; unsigned int i; for (i=0; ime_txns->mti_numreaders; i++) if (env->me_txns->mti_readers[i].mr_pid == pid) From a35f9b2a5346e84bc920111348a1ea7d1cdfe32a Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:14 +0200 Subject: [PATCH 08/15] Remove mdb data races. Use (txnid_t)-1 as "no ID". Avoid race between numreaders++ and reading numreaders at cleanup. Make the un-mutexed reset of reader table entry, atomic: Reset mr_pid only. Instead check mr_pid != 0 in mdb_page_alloc()'s scan for readers. (txnid_t)-1 as "no ID"-mark avoids a check for mr_txnid != 0. The scan can stop when seeing an old reader. --- libraries/libmdb/mdb.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 37403fbcb0..6b759d702a 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -433,7 +433,7 @@ typedef uint16_t indx_t; * lock file. */ typedef struct MDB_rxbody { - /** The current Transaction ID when this transaction began. + /** Current Transaction ID when this transaction began, or (txnid_t)-1. * Multiple readers that start at the same time will probably have the * same ID here. Again, it's not important to exclude them from * anything; all we need to know is which version of the DB they @@ -904,6 +904,7 @@ struct MDB_env { uint32_t me_flags; /**< @ref mdb_env */ unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */ unsigned int me_maxreaders; /**< size of the reader table */ + unsigned int me_numreaders; /**< max numreaders set by this env */ MDB_dbi me_numdbs; /**< number of DBs opened */ MDB_dbi me_maxdbs; /**< size of the DB table */ pid_t me_pid; /**< process ID of this env */ @@ -1233,10 +1234,12 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) if (!txn->mt_env->me_pghead && txn->mt_dbs[FREE_DBI].md_root != P_INVALID) { /* See if there's anything in the free DB */ + int j; + MDB_reader *r; MDB_cursor m2; MDB_node *leaf; MDB_val data; - txnid_t *kptr, oldest, last; + txnid_t *kptr, last; mdb_cursor_init(&m2, txn, FREE_DBI, NULL); if (!txn->mt_env->me_pgfirst) { @@ -1259,17 +1262,15 @@ again: last = *(txnid_t *)key.mv_data; } - { - unsigned int i; - oldest = txn->mt_txnid - 1; - for (i=0; imt_env->me_txns->mti_numreaders; i++) { - txnid_t mr = txn->mt_env->me_txns->mti_readers[i].mr_txnid; - if (mr && mr < oldest) - oldest = mr; - } + /* Unusable if referred by a meta page or reader... */ + j = 1; + if (last < txn->mt_txnid-1) { + j = txn->mt_env->me_txns->mti_numreaders; + r = txn->mt_env->me_txns->mti_readers + j; + for (j = -j; j && (last last) { + if (!j) { /* It's usable, grab it. */ MDB_oldpages *mop; @@ -1641,6 +1642,8 @@ mdb_txn_renew0(MDB_txn *txn) env->me_txns->mti_readers[i].mr_tid = tid; if (i >= env->me_txns->mti_numreaders) env->me_txns->mti_numreaders = i+1; + /* Save numreaders for un-mutexed mdb_env_close() */ + env->me_numreaders = env->me_txns->mti_numreaders; UNLOCK_MUTEX_R(env); r = &env->me_txns->mti_readers[i]; pthread_setspecific(env->me_txkey, r); @@ -1788,7 +1791,7 @@ mdb_txn_reset0(MDB_txn *txn) MDB_env *env = txn->mt_env; if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { - txn->mt_u.reader->mr_txnid = 0; + txn->mt_u.reader->mr_txnid = (txnid_t)-1; } else { MDB_oldpages *mop; MDB_page *dp; @@ -2668,9 +2671,7 @@ mdb_env_reader_dest(void *ptr) { MDB_reader *reader = ptr; - reader->mr_txnid = 0; reader->mr_pid = 0; - reader->mr_tid = 0; } #ifdef _WIN32 @@ -3259,7 +3260,7 @@ mdb_env_close(MDB_env *env) if (env->me_txns) { pid_t pid = env->me_pid; unsigned int i; - for (i=0; ime_txns->mti_numreaders; i++) + for (i=0; ime_numreaders; i++) if (env->me_txns->mti_readers[i].mr_pid == pid) env->me_txns->mti_readers[i].mr_pid = 0; #ifdef _WIN32 From 31be24896b455e45eaf4e3739299a19a29979183 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:14 +0200 Subject: [PATCH 09/15] ITS#7377 Wrap sem_wait & file locks in EINTR loops --- libraries/libmdb/mdb.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 6b759d702a..a593cd9314 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -158,11 +158,22 @@ #define close(fd) CloseHandle(fd) #define munmap(ptr,len) UnmapViewOfFile(ptr) #else + #ifdef MDB_USE_POSIX_SEM -#define LOCK_MUTEX_R(env) sem_wait((env)->me_rmutex) + +#define LOCK_MUTEX_R(env) mdb_sem_wait((env)->me_rmutex) #define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex) -#define LOCK_MUTEX_W(env) sem_wait((env)->me_wmutex) +#define LOCK_MUTEX_W(env) mdb_sem_wait((env)->me_wmutex) #define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex) + +static int +mdb_sem_wait(sem_t *sem) +{ + int rc; + while ((rc = sem_wait(sem)) && (rc = errno) == EINTR) ; + return rc; +} + #else /** Lock the reader mutex. */ @@ -2730,10 +2741,10 @@ PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; #endif /** Downgrade the exclusive lock on the region back to shared */ -static void +static int mdb_env_share_locks(MDB_env *env) { - int toggle = mdb_env_pick_meta(env); + int rc = 0, toggle = mdb_env_pick_meta(env); env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; @@ -2756,14 +2767,18 @@ mdb_env_share_locks(MDB_env *env) lock_info.l_whence = SEEK_SET; lock_info.l_start = 0; lock_info.l_len = 1; - fcntl(env->me_lfd, F_SETLK, &lock_info); + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; } #endif + + return rc; } static int mdb_env_excl_lock(MDB_env *env, int *excl) { + int rc = 0; #ifdef _WIN32 if (LockFile(env->me_lfd, 0, 0, 1, 0)) { *excl = 1; @@ -2771,7 +2786,7 @@ mdb_env_excl_lock(MDB_env *env, int *excl) OVERLAPPED ov; memset(&ov, 0, sizeof(ov)); if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { - return ErrCode(); + rc = ErrCode(); } } #else @@ -2785,12 +2800,11 @@ mdb_env_excl_lock(MDB_env *env, int *excl) *excl = 1; } else { lock_info.l_type = F_RDLCK; - if (fcntl(env->me_lfd, F_SETLKW, &lock_info)) { - return ErrCode(); - } + while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) && + (rc = ErrCode()) == EINTR) ; } #endif - return 0; + return rc; } #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) @@ -3194,8 +3208,11 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) goto leave; } #endif - if (excl) - mdb_env_share_locks(env); + if (excl) { + rc = mdb_env_share_locks(env); + if (rc) + goto leave; + } env->me_numdbs = 2; env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx)); env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t)); From c760e536ec1b9fed01b4d8e8d884533dceec397d Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:15 +0200 Subject: [PATCH 10/15] ITS#7364 Always sem_unlink() in mdb_env_open(). Drop the sem_unlink() error checks, which could prevent the 2nd unlink. Instead use O_EXCL in sem_open(). This makes "open+close the database" the API for trying to clean away the old semaphores, if they were left behind by a previous run. --- libraries/libmdb/mdb.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index a593cd9314..9546d01f47 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -3006,12 +3006,12 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) val.mv_size = sizeof(idbuf); mdb_hash_hex(&val, hexbuf); sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", hexbuf); + sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", hexbuf); env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); if (!env->me_rmutex) { rc = ErrCode(); goto fail; } - sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", hexbuf); env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); if (!env->me_wmutex) { rc = ErrCode(); @@ -3033,23 +3033,20 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) val.mv_size = sizeof(idbuf); mdb_hash_hex(&val, hexbuf); sprintf(env->me_txns->mti_rmname, "/MDBr%s", hexbuf); - if (sem_unlink(env->me_txns->mti_rmname)) { - rc = ErrCode(); - if (rc != ENOENT && rc != EINVAL) - goto fail; - } - env->me_rmutex = sem_open(env->me_txns->mti_rmname, O_CREAT, mode, 1); + sprintf(env->me_txns->mti_wmname, "/MDBw%s", hexbuf); + /* Clean up after a previous run, if needed: Try to + * remove both semaphores before doing anything else. + */ + sem_unlink(env->me_txns->mti_rmname); + sem_unlink(env->me_txns->mti_wmname); + env->me_rmutex = sem_open(env->me_txns->mti_rmname, + O_CREAT|O_EXCL, mode, 1); if (env->me_rmutex == SEM_FAILED) { rc = ErrCode(); goto fail; } - sprintf(env->me_txns->mti_wmname, "/MDBw%s", hexbuf); - if (sem_unlink(env->me_txns->mti_wmname)) { - rc = ErrCode(); - if (rc != ENOENT && rc != EINVAL) - goto fail; - } - env->me_wmutex = sem_open(env->me_txns->mti_wmname, O_CREAT, mode, 1); + env->me_wmutex = sem_open(env->me_txns->mti_wmname, + O_CREAT|O_EXCL, mode, 1); if (env->me_wmutex == SEM_FAILED) { rc = ErrCode(); goto fail; From fe1b3794de3944967bd4ebce46dd0dc0d3449a17 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:15 +0200 Subject: [PATCH 11/15] ITS#7364 Limit mdb lock upgrade before sem_unlink. Do not try shared lock when closing or after error. Track file lock state to decide. Change meaning of *excl to reflect file lock state. --- libraries/libmdb/mdb.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 9546d01f47..b00e0767f8 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -2775,6 +2775,9 @@ mdb_env_share_locks(MDB_env *env) return rc; } +/** Try to get exlusive lock, otherwise shared. + * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive. + */ static int mdb_env_excl_lock(MDB_env *env, int *excl) { @@ -2798,7 +2801,11 @@ mdb_env_excl_lock(MDB_env *env, int *excl) lock_info.l_len = 1; if (!fcntl(env->me_lfd, F_SETLK, &lock_info)) { *excl = 1; - } else { + } else +# ifdef MDB_USE_POSIX_SEM + if (*excl < 0) /* always true when !MDB_USE_POSIX_SEM */ +# endif + { lock_info.l_type = F_RDLCK; while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) && (rc = ErrCode()) == EINTR) ; @@ -2896,7 +2903,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) int rc; off_t size, rsize; - *excl = 0; + *excl = -1; #ifdef _WIN32 if ((env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, @@ -2934,7 +2941,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) size = lseek(env->me_lfd, 0, SEEK_END); #endif rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); - if (size < rsize && *excl) { + if (size < rsize && *excl > 0) { #ifdef _WIN32 SetFilePointer(env->me_lfd, rsize, NULL, 0); if (!SetEndOfFile(env->me_lfd)) { @@ -2978,7 +2985,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) env->me_txns = m; #endif } - if (*excl) { + if (*excl > 0) { #ifdef _WIN32 BY_HANDLE_FILE_INFORMATION stbuf; struct { @@ -3205,7 +3212,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) goto leave; } #endif - if (excl) { + if (excl > 0) { rc = mdb_env_share_locks(env); if (rc) goto leave; From c0f3d9b9a8864e744a8d1eaffc99bc86026d5351 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:15 +0200 Subject: [PATCH 12/15] ITS#7377 Catch MDB setup errors and clean up. --- libraries/libmdb/mdb.c | 249 +++++++++++++++++++---------------------- 1 file changed, 115 insertions(+), 134 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index b00e0767f8..8aacb88ad9 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -974,6 +974,7 @@ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static int mdb_env_pick_meta(const MDB_env *env); static int mdb_env_write_meta(MDB_txn *txn); +static void mdb_env_close0(MDB_env *env, int excl); static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); static int mdb_node_add(MDB_cursor *mc, indx_t indx, @@ -2521,6 +2522,10 @@ mdb_env_create(MDB_env **env) e->me_fd = INVALID_HANDLE_VALUE; e->me_lfd = INVALID_HANDLE_VALUE; e->me_mfd = INVALID_HANDLE_VALUE; +#ifdef MDB_USE_POSIX_SEM + e->me_rmutex = SEM_FAILED; + e->me_wmutex = SEM_FAILED; +#endif e->me_pid = getpid(); VGMEMP_CREATE(e,0,0); *env = e; @@ -2640,7 +2645,6 @@ mdb_env_open2(MDB_env *env, unsigned int flags) meta.mm_address = env->me_map; i = mdb_env_init_meta(env, &meta); if (i != MDB_SUCCESS) { - munmap(env->me_map, env->me_mapsize); return i; } } @@ -2742,7 +2746,7 @@ PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; /** Downgrade the exclusive lock on the region back to shared */ static int -mdb_env_share_locks(MDB_env *env) +mdb_env_share_locks(MDB_env *env, int *excl) { int rc = 0, toggle = mdb_env_pick_meta(env); @@ -2757,6 +2761,7 @@ mdb_env_share_locks(MDB_env *env) memset(&ov, 0, sizeof(ov)); LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov); UnlockFile(env->me_lfd, 0, 0, 1, 0); + *excl = 0; } #else { @@ -2769,6 +2774,7 @@ mdb_env_share_locks(MDB_env *env) lock_info.l_len = 1; while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && (rc = ErrCode()) == EINTR) ; + *excl = rc ? -1 : 0; /* error may mean we lost the lock */ } #endif @@ -2809,6 +2815,8 @@ mdb_env_excl_lock(MDB_env *env, int *excl) lock_info.l_type = F_RDLCK; while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) && (rc = ErrCode()) == EINTR) ; + if (rc == 0) + *excl = 0; } #endif return rc; @@ -2894,7 +2902,7 @@ mdb_hash_hex(MDB_val *val, char *hexbuf) * @param[in] env The MDB environment. * @param[in] lpath The pathname of the file used for the lock region. * @param[in] mode The Unix permissions for the file, if we create it. - * @param[out] excl Set to true if we got an exclusive lock on the region. + * @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive * @return 0 on success, non-zero on failure. */ static int @@ -2909,8 +2917,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) if ((env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - return rc; + goto fail_errno; } /* Try to get exclusive lock. If we succeed, then * nobody is using the lock region and we should initialize it. @@ -2923,14 +2930,14 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) { int fdflags; if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT, mode)) == -1) - return ErrCode(); + goto fail_errno; /* Lose record locks when exec*() */ if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) fcntl(env->me_lfd, F_SETFD, fdflags); } #else /* O_CLOEXEC on Linux: Open file and set FD_CLOEXEC atomically */ if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT|O_CLOEXEC, mode)) == -1) - return ErrCode(); + goto fail_errno; #endif /* Try to get exclusive lock. If we succeed, then @@ -2944,15 +2951,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) if (size < rsize && *excl > 0) { #ifdef _WIN32 SetFilePointer(env->me_lfd, rsize, NULL, 0); - if (!SetEndOfFile(env->me_lfd)) { - rc = ErrCode(); - goto fail; - } + if (!SetEndOfFile(env->me_lfd)) goto fail_errno; #else - if (ftruncate(env->me_lfd, rsize) != 0) { - rc = ErrCode(); - goto fail; - } + if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno; #endif } else { rsize = size; @@ -2964,24 +2965,14 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) HANDLE mh; mh = CreateFileMapping(env->me_lfd, NULL, PAGE_READWRITE, 0, 0, NULL); - if (!mh) { - rc = ErrCode(); - goto fail; - } + if (!mh) goto fail_errno; env->me_txns = MapViewOfFileEx(mh, FILE_MAP_WRITE, 0, 0, rsize, NULL); CloseHandle(mh); - if (!env->me_txns) { - rc = ErrCode(); - goto fail; - } + if (!env->me_txns) goto fail_errno; #else void *m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, env->me_lfd, 0); - if (m == MAP_FAILED) { - env->me_txns = NULL; - rc = ErrCode(); - goto fail; - } + if (m == MAP_FAILED) goto fail_errno; env->me_txns = m; #endif } @@ -3015,15 +3006,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", hexbuf); sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", hexbuf); env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); - if (!env->me_rmutex) { - rc = ErrCode(); - goto fail; - } + if (!env->me_rmutex) goto fail_errno; env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); - if (!env->me_wmutex) { - rc = ErrCode(); - goto fail; - } + if (!env->me_wmutex) goto fail_errno; #elif defined(MDB_USE_POSIX_SEM) struct stat stbuf; struct { @@ -3033,7 +3018,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) MDB_val val; char hexbuf[17]; - fstat(env->me_lfd, &stbuf); + if (fstat(env->me_lfd, &stbuf)) goto fail_errno; idbuf.dev = stbuf.st_dev; idbuf.ino = stbuf.st_ino; val.mv_data = &idbuf; @@ -3048,27 +3033,21 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) sem_unlink(env->me_txns->mti_wmname); env->me_rmutex = sem_open(env->me_txns->mti_rmname, O_CREAT|O_EXCL, mode, 1); - if (env->me_rmutex == SEM_FAILED) { - rc = ErrCode(); - goto fail; - } + if (env->me_rmutex == SEM_FAILED) goto fail_errno; env->me_wmutex = sem_open(env->me_txns->mti_wmname, O_CREAT|O_EXCL, mode, 1); - if (env->me_wmutex == SEM_FAILED) { - rc = ErrCode(); - goto fail; - } + if (env->me_wmutex == SEM_FAILED) goto fail_errno; #else /* MDB_USE_POSIX_SEM */ pthread_mutexattr_t mattr; - pthread_mutexattr_init(&mattr); - rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED); - if (rc) { + if ((rc = pthread_mutexattr_init(&mattr)) + || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) + || (rc = pthread_mutex_init(&env->me_txns->mti_mutex, &mattr)) + || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) goto fail; - } - pthread_mutex_init(&env->me_txns->mti_mutex, &mattr); - pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr); + pthread_mutexattr_destroy(&mattr); #endif /* _WIN32 || MDB_USE_POSIX_SEM */ + env->me_txns->mti_version = MDB_VERSION; env->me_txns->mti_magic = MDB_MAGIC; env->me_txns->mti_txnid = 0; @@ -3092,35 +3071,22 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) } #ifdef _WIN32 env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); - if (!env->me_rmutex) { - rc = ErrCode(); - goto fail; - } + if (!env->me_rmutex) goto fail_errno; env->me_wmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname); - if (!env->me_wmutex) { - rc = ErrCode(); - goto fail; - } + if (!env->me_wmutex) goto fail_errno; #elif defined(MDB_USE_POSIX_SEM) env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0); - if (env->me_rmutex == SEM_FAILED) { - rc = ErrCode(); - goto fail; - } + if (env->me_rmutex == SEM_FAILED) goto fail_errno; env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0); - if (env->me_wmutex == SEM_FAILED) { - rc = ErrCode(); - goto fail; - } + if (env->me_wmutex == SEM_FAILED) goto fail_errno; #endif } return MDB_SUCCESS; +fail_errno: + rc = ErrCode(); fail: - close(env->me_lfd); - env->me_lfd = INVALID_HANDLE_VALUE; return rc; - } /** The name of the lock file in the DB environment */ @@ -3200,9 +3166,9 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) goto leave; } } - env->me_path = strdup(path); DPRINTF("opened dbenv %p", (void *) env); pthread_key_create(&env->me_txkey, mdb_env_reader_dest); + env->me_numdbs = 2; /* this notes that me_txkey was set */ #ifdef _WIN32 /* Windows TLS callbacks need help finding their TLS info. */ if (mdb_tls_nkeys < MAX_TLS_KEYS) @@ -3213,32 +3179,97 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) } #endif if (excl > 0) { - rc = mdb_env_share_locks(env); + rc = mdb_env_share_locks(env, &excl); if (rc) goto leave; } - env->me_numdbs = 2; env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx)); env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t)); - if (!env->me_dbxs || !env->me_dbflags) + env->me_path = strdup(path); + if (!env->me_dbxs || !env->me_dbflags || !env->me_path) rc = ENOMEM; } leave: if (rc) { - if (env->me_fd != INVALID_HANDLE_VALUE) { - close(env->me_fd); - env->me_fd = INVALID_HANDLE_VALUE; - } - if (env->me_lfd != INVALID_HANDLE_VALUE) { - close(env->me_lfd); - env->me_lfd = INVALID_HANDLE_VALUE; - } + mdb_env_close0(env, excl); } free(lpath); return rc; } +/** Destroy resources from mdb_env_open() and clear our readers */ +static void +mdb_env_close0(MDB_env *env, int excl) +{ + int i; + + if (env->me_lfd == INVALID_HANDLE_VALUE) /* 1st field to get inited */ + return; + + free(env->me_dbflags); + free(env->me_dbxs); + free(env->me_path); + + if (env->me_numdbs) { + pthread_key_delete(env->me_txkey); +#ifdef _WIN32 + /* Delete our key from the global list */ + for (i=0; ime_txkey) { + mdb_tls_keys[i] = mdb_tls_keys[mdb_tls_nkeys-1]; + mdb_tls_nkeys--; + break; + } +#endif + } + + if (env->me_map) { + munmap(env->me_map, env->me_mapsize); + } + if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) + close(env->me_mfd); + if (env->me_fd != INVALID_HANDLE_VALUE) + close(env->me_fd); + if (env->me_txns) { + pid_t pid = env->me_pid; + /* Clearing readers is done in this function because + * me_txkey with its destructor must be disabled first. + */ + for (i = env->me_numreaders; --i >= 0; ) + if (env->me_txns->mti_readers[i].mr_pid == pid) + env->me_txns->mti_readers[i].mr_pid = 0; +#ifdef _WIN32 + if (env->me_rmutex) { + CloseHandle(env->me_rmutex); + if (env->me_wmutex) CloseHandle(env->me_wmutex); + } + /* Windows automatically destroys the mutexes when + * the last handle closes. + */ +#elif defined(MDB_USE_POSIX_SEM) + if (env->me_rmutex != SEM_FAILED) { + sem_close(env->me_rmutex); + if (env->me_wmutex != SEM_FAILED) + sem_close(env->me_wmutex); + /* If we have the filelock: If we are the + * only remaining user, clean up semaphores. + */ + if (excl == 0) + mdb_env_excl_lock(env, &excl); + if (excl > 0) { + sem_unlink(env->me_txns->mti_rmname); + sem_unlink(env->me_txns->mti_wmname); + } + } +#endif + munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); + } + close(env->me_lfd); + + env->me_lfd = INVALID_HANDLE_VALUE; /* Mark env as reset */ +} + void mdb_env_close(MDB_env *env) { @@ -3248,63 +3279,13 @@ mdb_env_close(MDB_env *env) return; VGMEMP_DESTROY(env); - while (env->me_dpages) { - dp = env->me_dpages; + while ((dp = env->me_dpages) != NULL) { VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next)); env->me_dpages = dp->mp_next; free(dp); } - free(env->me_dbflags); - free(env->me_dbxs); - free(env->me_path); - - pthread_key_delete(env->me_txkey); -#ifdef _WIN32 - /* Delete our key from the global list */ - { int i; - for (i=0; ime_txkey) { - mdb_tls_keys[i] = mdb_tls_keys[mdb_tls_nkeys-1]; - mdb_tls_nkeys--; - break; - } - } -#endif - - if (env->me_map) { - munmap(env->me_map, env->me_mapsize); - } - if (env->me_mfd != env->me_fd) - close(env->me_mfd); - close(env->me_fd); - if (env->me_txns) { - pid_t pid = env->me_pid; - unsigned int i; - for (i=0; ime_numreaders; i++) - if (env->me_txns->mti_readers[i].mr_pid == pid) - env->me_txns->mti_readers[i].mr_pid = 0; -#ifdef _WIN32 - CloseHandle(env->me_rmutex); - CloseHandle(env->me_wmutex); - /* Windows automatically destroys the mutexes when - * the last handle closes. - */ -#elif defined(MDB_USE_POSIX_SEM) - sem_close(env->me_rmutex); - sem_close(env->me_wmutex); - { int excl = 0; - if (!mdb_env_excl_lock(env, &excl) && excl) { - /* we are the only remaining user of the environment. - clean up semaphores. */ - sem_unlink(env->me_txns->mti_rmname); - sem_unlink(env->me_txns->mti_wmname); - } - } -#endif - munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); - } - close(env->me_lfd); + mdb_env_close0(env, 0); mdb_midl_free(env->me_free_pgs); free(env); } From c67ea9c060d845829a5ef903f2437cac2e044255 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 17 Sep 2012 15:42:15 +0200 Subject: [PATCH 13/15] ITS#7377 Catch MDB user errors. --- libraries/libmdb/mdb.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 8aacb88ad9..533ec7b4a8 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -1698,7 +1698,7 @@ mdb_txn_renew(MDB_txn *txn) { int rc; - if (!txn) + if (! (txn && txn->mt_flags & MDB_TXN_RDONLY)) return EINVAL; if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { @@ -1728,13 +1728,13 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY)) return EACCES; if (parent) { - /* parent already has an active child txn */ - if (parent->mt_child) { + /* Nested transactions: Max 1 child, write txns only, no writemap */ + if (parent->mt_child || + (flags & MDB_RDONLY) || (parent->mt_flags & MDB_TXN_RDONLY) || + (env->me_flags & MDB_WRITEMAP)) + { return EINVAL; } - /* nested TXNs not supported here */ - if (env->me_flags & MDB_WRITEMAP) - return EINVAL; } size = sizeof(MDB_txn) + env->me_maxdbs * (sizeof(MDB_db)+1); if (!(flags & MDB_RDONLY)) @@ -3102,6 +3102,9 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) int oflags, rc, len, excl; char *lpath, *dpath; + if (env->me_fd != INVALID_HANDLE_VALUE) + return EINVAL; + len = strlen(path); if (flags & MDB_NOSUBDIR) { rc = len + sizeof(LOCKSUFF) + len + 1; From 5ef56b437e2163e14b7e761bcd2e2700fe007c7d Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 17 Sep 2012 07:01:28 -0700 Subject: [PATCH 14/15] More for ab04c50a32275e216b82edaeeed50208cf49336b Use explicit MDB_RDONLY flag --- libraries/libmdb/mdb_stat.c | 2 +- libraries/libmdb/mdb_stata.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libraries/libmdb/mdb_stat.c b/libraries/libmdb/mdb_stat.c index 6007510c6b..97bd8d4467 100644 --- a/libraries/libmdb/mdb_stat.c +++ b/libraries/libmdb/mdb_stat.c @@ -38,7 +38,7 @@ int main(int argc,char * argv[]) printf("mdb_env_open failed, error %d\n", rc); goto env_close; } - rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) { printf("mdb_txn_begin failed, error %d\n", rc); goto env_close; diff --git a/libraries/libmdb/mdb_stata.c b/libraries/libmdb/mdb_stata.c index f8b2fe59a8..7cfebb4914 100644 --- a/libraries/libmdb/mdb_stata.c +++ b/libraries/libmdb/mdb_stata.c @@ -36,7 +36,7 @@ int main(int argc,char * argv[]) printf("mdb_env_open failed, error %d\n", rc); goto env_close; } - rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) { printf("mdb_txn_begin failed, error %d\n", rc); goto env_close; From 0a359fb62998d54c4c3b44f0d1b243e52640fd72 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 17 Sep 2012 07:02:41 -0700 Subject: [PATCH 15/15] More for 48ef27b6f5c804eca6a9d27f8dd2b4ded376f8af page_split with newindex > split_indx --- libraries/libmdb/mdb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 533ec7b4a8..2c54cbd2e5 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -6128,9 +6128,10 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno psize += NODEDSZ(node); psize += psize & 1; if (psize > pmax) { - if (i >= newindx) + if (i >= newindx) { split_indx = newindx; - else + newpos = 0; + } else split_indx = i+1; break; }