From 2b960613e03dbc5b371378abfd051c8e75821171 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 3 Oct 2012 18:04:07 +0200 Subject: [PATCH 01/20] ITS#7377 Document caveats/troubeshooting. --- libraries/libmdb/mdb.h | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 925bf17d37..1272630787 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -38,6 +38,61 @@ * corrupt the database. Of course if your application code is known to * be bug-free (...) then this is not an issue. * + * Troubleshooting the lock file, plus semaphores on BSD systems: + * + * - A broken lockfile can cause sync issues. + * Stale reader transactions left behind by an aborted program + * cause further writes to grow the database quickly, and + * stale locks can block further operation. + * + * Fix: Terminate all programs using the database, or make + * them close it. Next database user will reset the lockfile. + * + * - On BSD systems or others configured with MDB_USE_POSIX_SEM, + * startup can fail due to semaphores owned by another userid. + * + * Fix: Open and close the database as the user which owns the + * semaphores (likely last user) or as root, while no other + * process is using the database. + * + * Restrictions/caveats (in addition to those listed for some functions): + * + * - Only the database owner should normally use the database on + * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM. + * Multiple users can cause startup to fail later, as noted above. + * + * - A thread can only use one transaction at a time, plus any child + * transactions. Each transaction belongs to one thread. See below. + * + * - Use an MDB_env* in the process which opened it, without fork()ing. + * + * - Do not have open an MDB database twice in the same process at + * the same time. Not even from a plain open() call - close()ing it + * breaks flock() advisory locking. + * + * - Avoid long-lived transactions. Read transactions prevent + * reuse of pages freed by newer write transactions, thus the + * database can grow quickly. Write transactions prevent + * other write transactions, since writes are serialized. + * + * ...when several processes can use a database concurrently: + * + * - Avoid suspending a process with active transactions. These + * would then be "long-lived" as above. + * + * - Avoid aborting a process with an active transaction. + * The transaction becomes "long-lived" as above until the lockfile + * is reset, since the process may not remove it from the lockfile. + * + * - If you do that anyway, close the environment once in a while, + * so the lockfile can get reset. + * + * - Do not use MDB databases on remote filesystems. This breaks + * flock() on some OSes, even between two processes on the same host. + * + * - Opening a database can fail if another process is opening or + * closing it at exactly the same time. + * * @author Howard Chu, Symas Corporation. * * @copyright Copyright 2011-2012 Howard Chu, Symas Corp. All rights reserved. From ab3fea51dc890feaca7075643efe9410949e5363 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 3 Oct 2012 18:06:29 +0200 Subject: [PATCH 02/20] ITS#7377 Catch ftruncate() error --- libraries/libmdb/mdb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 42e33824b1..e92b5e7b1b 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -2629,7 +2629,8 @@ mdb_env_open2(MDB_env *env, unsigned int flags) prot = PROT_READ; if (flags & MDB_WRITEMAP) { prot |= PROT_WRITE; - ftruncate(env->me_fd, env->me_mapsize); + if (ftruncate(env->me_fd, env->me_mapsize) < 0) + return ErrCode(); } env->me_map = mmap(meta.mm_address, env->me_mapsize, prot, i, env->me_fd, 0); From 38fb8e6eb1c2db048fd2473bfcdd5eb16ec54517 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 3 Oct 2012 18:06:29 +0200 Subject: [PATCH 03/20] ITS#7377 Wrap fcntl F_SETLK in EINTR loop too. This can happen even on local filesystems. --- libraries/libmdb/mdb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index e92b5e7b1b..9e3e2ae9d7 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -2806,7 +2806,9 @@ mdb_env_excl_lock(MDB_env *env, int *excl) lock_info.l_whence = SEEK_SET; lock_info.l_start = 0; lock_info.l_len = 1; - if (!fcntl(env->me_lfd, F_SETLK, &lock_info)) { + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (!rc) { *excl = 1; } else # ifdef MDB_USE_POSIX_SEM From a3f33dd20e71b9c94e94a0e1125fcee8d35fc70f Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 3 Oct 2012 18:08:56 +0200 Subject: [PATCH 04/20] Fix mdb_open() off-by-one error in maxdbs check --- libraries/libmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 9e3e2ae9d7..262919a24c 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -6548,7 +6548,7 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) } /* If no free slot and max hit, fail */ - if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs - 1) + if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs) return MDB_DBS_FULL; /* Find the DB info */ From 756ce8e10ca52ff4516b56d42308116fac3c185d Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 3 Oct 2012 21:13:21 +0200 Subject: [PATCH 05/20] Tweak MDB restrictions --- libraries/libmdb/mdb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 1272630787..8ded7374c0 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -87,8 +87,10 @@ * - If you do that anyway, close the environment once in a while, * so the lockfile can get reset. * - * - Do not use MDB databases on remote filesystems. This breaks - * flock() on some OSes, even between two processes on the same host. + * - Do not use MDB databases on remote filesystems, even between + * processes on the same host. This breaks flock() on some OSes, + * possibly memory map sync, and certainly sync between programs + * on different hosts. * * - Opening a database can fail if another process is opening or * closing it at exactly the same time. From ebb6859ea5f4ca44ca528e49b17800f9992703e7 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 11 Oct 2012 12:21:40 -0700 Subject: [PATCH 06/20] Add mdb_copy for backing up a DB environment --- libraries/libmdb/Makefile | 3 +- libraries/libmdb/mdb.c | 96 +++++++++++++++++++++++++++++++++++++ libraries/libmdb/mdb.h | 12 +++++ libraries/libmdb/mdb_copy.c | 43 +++++++++++++++++ 4 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 libraries/libmdb/mdb_copy.c diff --git a/libraries/libmdb/Makefile b/libraries/libmdb/Makefile index 796be078d4..67a2007bb8 100644 --- a/libraries/libmdb/Makefile +++ b/libraries/libmdb/Makefile @@ -5,7 +5,7 @@ CFLAGS = -pthread $(OPT) $(W) $(XCFLAGS) LDLIBS = SOLIBS = -PROGS = mdb_stat mtest mtest2 mtest3 mtest4 mtest5 +PROGS = mdb_stat mdb_copy mtest mtest2 mtest3 mtest4 mtest5 all: libmdb.a libmdb.so $(PROGS) clean: @@ -22,6 +22,7 @@ libmdb.so: mdb.o midl.o gcc -pthread -shared -o $@ mdb.o midl.o $(SOLIBS) mdb_stat: mdb_stat.o libmdb.a +mdb_copy: mdb_copy.o libmdb.a mtest: mtest.o libmdb.a mtest2: mtest2.o libmdb.a mtest3: mtest3.o libmdb.a diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 262919a24c..7d9ea5cd4a 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -32,6 +32,7 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#define _GNU_SOURCE #include #include #include @@ -3280,6 +3281,101 @@ mdb_env_close0(MDB_env *env, int excl) env->me_lfd = INVALID_HANDLE_VALUE; /* Mark env as reset */ } +int +mdb_env_copy(MDB_env *env, const char *path) +{ + MDB_txn *txn = NULL; + int rc, len, oflags; + size_t wsize; + char *lpath, *ptr; + HANDLE newfd = INVALID_HANDLE_VALUE; + + if (env->me_flags & MDB_NOSUBDIR) { + lpath = path; + } else { + len = strlen(path); + len += sizeof(DATANAME); + lpath = malloc(len); + if (!lpath) + return ENOMEM; + sprintf(lpath, "%s" DATANAME, path); + } + + /* The destination path must exist, but the destination file must not. + * We don't want the OS to cache the writes, since the source data is + * already in the OS cache. + */ +#ifdef _WIN32 + newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); +#else + newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL +#ifdef O_DIRECT + |O_DIRECT +#endif + , 0666); +#endif + if (!(env->me_flags & MDB_NOSUBDIR)) + free(lpath); + if (newfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + +#ifdef F_NOCACHE /* __APPLE__ */ + rc = fcntl(newfd, F_NOCACHE, 1); + if (rc) { + rc = ErrCode(); + goto leave; + } +#endif + + /* Temporarily block writers until we snapshot the meta pages */ + LOCK_MUTEX_W(env); + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) { + UNLOCK_MUTEX_W(env); + goto leave; + } + + wsize = env->me_psize * 2; +#ifdef _WIN32 + { + DWORD len; + rc = WriteFile(newfd, env->me_map, wsize, &len, NULL); + rc = (len == wsize) ? MDB_SUCCESS : ErrCode(); + } +#else + rc = write(newfd, env->me_map, wsize); + rc = (rc == (int)wsize) ? MDB_SUCCESS : ErrCode(); +#endif + UNLOCK_MUTEX_W(env); + + if (rc) + goto leave; + + ptr = env->me_map + wsize; + wsize = txn->mt_next_pgno * env->me_psize - wsize; +#ifdef _WIN32 + { + DWORD len; + rc = WriteFile(newfd, ptr, wsize, &len, NULL); + rc = (len == wsize) ? MDB_SUCCESS : ErrCode(); + } +#else + rc = write(newfd, ptr, wsize); + rc = (rc == (int)wsize) ? MDB_SUCCESS : ErrCode(); +#endif + mdb_txn_abort(txn); + +leave: + if (newfd != INVALID_HANDLE_VALUE) + close(newfd); + + return rc; +} + void mdb_env_close(MDB_env *env) { diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 8ded7374c0..1735fbd375 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -450,6 +450,18 @@ int mdb_env_create(MDB_env **env); */ int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode); + /** @brief Copy an MDB environment to the specified path. + * + * This function may be used to make a backup of an existing environment. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy(MDB_env *env, const char *path); + /** @brief Return statistics about the MDB environment. * * @param[in] env An environment handle returned by #mdb_env_create() diff --git a/libraries/libmdb/mdb_copy.c b/libraries/libmdb/mdb_copy.c new file mode 100644 index 0000000000..c5eb6b500b --- /dev/null +++ b/libraries/libmdb/mdb_copy.c @@ -0,0 +1,43 @@ +/* mdb_copy.c - memory-mapped database backup tool */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ +#include +#include +#include +#include "mdb.h" + +int main(int argc,char * argv[]) +{ + int rc; + MDB_env *env; + char *envname = argv[1]; + + if (argc != 3) { + fprintf(stderr, "usage: %s srcpath dstpath\n", argv[0]); + exit(EXIT_FAILURE); + } + + rc = mdb_env_create(&env); + + rc = mdb_env_open(env, envname, MDB_RDONLY, 0); + if (rc) { + printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + } else { + rc = mdb_env_copy(env, argv[2]); + if (rc) + printf("mdb_env_copy failed, error %d %s\n", rc, mdb_strerror(rc)); + } + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} From f037dd2d19978256182a0f91f9a8178d37a8193f Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 11 Oct 2012 16:47:56 -0700 Subject: [PATCH 07/20] Silence warning in prev commit --- libraries/libmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 7d9ea5cd4a..15432178c2 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -3291,7 +3291,7 @@ mdb_env_copy(MDB_env *env, const char *path) HANDLE newfd = INVALID_HANDLE_VALUE; if (env->me_flags & MDB_NOSUBDIR) { - lpath = path; + lpath = (char *)path; } else { len = strlen(path); len += sizeof(DATANAME); From e40713b631506dcab54402c26a826bd2d421dcb0 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sat, 13 Oct 2012 04:05:00 -0700 Subject: [PATCH 08/20] Support read access on read-only media --- libraries/libmdb/mdb.c | 77 ++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 15432178c2..9bfe2f8d1f 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -913,6 +913,8 @@ struct MDB_env { HANDLE me_mfd; /**< just for writing the meta pages */ /** Failed to update the meta page. Probably an I/O error. */ #define MDB_FATAL_ERROR 0x80000000U + /** Read-only Filesystem. Allow read access, no locking. */ +#define MDB_ROFS 0x40000000U uint32_t me_flags; /**< @ref mdb_env */ unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */ unsigned int me_maxreaders; /**< size of the reader table */ @@ -1638,33 +1640,39 @@ mdb_txn_renew0(MDB_txn *txn) txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ if (txn->mt_flags & MDB_TXN_RDONLY) { - MDB_reader *r = pthread_getspecific(env->me_txkey); - if (!r) { - pid_t pid = env->me_pid; - pthread_t tid = pthread_self(); + if (env->me_flags & MDB_ROFS) { + i = mdb_env_pick_meta(env); + txn->mt_txnid = env->me_metas[i]->mm_txnid; + txn->mt_u.reader = NULL; + } else { + MDB_reader *r = pthread_getspecific(env->me_txkey); + if (!r) { + pid_t pid = env->me_pid; + pthread_t tid = pthread_self(); - LOCK_MUTEX_R(env); - for (i=0; ime_txns->mti_numreaders; i++) - if (env->me_txns->mti_readers[i].mr_pid == 0) - break; - if (i == env->me_maxreaders) { + LOCK_MUTEX_R(env); + for (i=0; ime_txns->mti_numreaders; i++) + if (env->me_txns->mti_readers[i].mr_pid == 0) + break; + if (i == env->me_maxreaders) { + UNLOCK_MUTEX_R(env); + return MDB_READERS_FULL; + } + env->me_txns->mti_readers[i].mr_pid = pid; + env->me_txns->mti_readers[i].mr_tid = tid; + if (i >= env->me_txns->mti_numreaders) + env->me_txns->mti_numreaders = i+1; + /* Save numreaders for un-mutexed mdb_env_close() */ + env->me_numreaders = env->me_txns->mti_numreaders; UNLOCK_MUTEX_R(env); - return MDB_READERS_FULL; + r = &env->me_txns->mti_readers[i]; + pthread_setspecific(env->me_txkey, r); } - env->me_txns->mti_readers[i].mr_pid = pid; - env->me_txns->mti_readers[i].mr_tid = tid; - if (i >= env->me_txns->mti_numreaders) - env->me_txns->mti_numreaders = i+1; - /* Save numreaders for un-mutexed mdb_env_close() */ - env->me_numreaders = env->me_txns->mti_numreaders; - UNLOCK_MUTEX_R(env); - r = &env->me_txns->mti_readers[i]; - pthread_setspecific(env->me_txkey, r); + txn->mt_txnid = r->mr_txnid = env->me_txns->mti_txnid; + txn->mt_u.reader = r; } - txn->mt_txnid = r->mr_txnid = env->me_txns->mti_txnid; txn->mt_toggle = txn->mt_txnid & 1; txn->mt_next_pgno = env->me_metas[txn->mt_toggle]->mm_last_pg+1; - txn->mt_u.reader = r; } else { LOCK_MUTEX_W(env); @@ -1804,7 +1812,8 @@ mdb_txn_reset0(MDB_txn *txn) MDB_env *env = txn->mt_env; if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { - txn->mt_u.reader->mr_txnid = (txnid_t)-1; + if (!(env->me_flags & MDB_ROFS)) + txn->mt_u.reader->mr_txnid = (txnid_t)-1; } else { MDB_oldpages *mop; MDB_page *dp; @@ -2580,7 +2589,7 @@ mdb_env_open2(MDB_env *env, unsigned int flags) MDB_meta meta; MDB_page *p; - env->me_flags = flags; + env->me_flags |= flags; memset(&meta, 0, sizeof(meta)); @@ -2921,6 +2930,11 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) if ((env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + if (rc == ERROR_WRITE_PROTECT) { + env->me_flags |= MDB_ROFS; + return MDB_SUCCESS; + } goto fail_errno; } /* Try to get exclusive lock. If we succeed, then @@ -2933,15 +2947,27 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) #if !(O_CLOEXEC) { int fdflags; - if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT, mode)) == -1) + if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT, mode)) == -1) { + rc = ErrCode(); + if (rc == EROFS) { + env->me_flags |= MDB_ROFS; + return MDB_SUCCESS; + } goto fail_errno; + } /* Lose record locks when exec*() */ if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) fcntl(env->me_lfd, F_SETFD, fdflags); } #else /* O_CLOEXEC on Linux: Open file and set FD_CLOEXEC atomically */ - if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT|O_CLOEXEC, mode)) == -1) + if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT|O_CLOEXEC, mode)) == -1) { + rc = ErrCode(); + if (rc == EROFS) { + env->me_flags |= MDB_ROFS; + return MDB_SUCCESS; + } goto fail_errno; + } #endif /* Try to get exclusive lock. If we succeed, then @@ -3128,6 +3154,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) sprintf(dpath, "%s" DATANAME, path); } + env->me_flags = 0; rc = mdb_env_setup_locks(env, lpath, mode, &excl); if (rc) goto leave; From 462d9dfd104397652a905e8f685289bd4ac8a6e1 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 15 Oct 2012 17:03:33 +0200 Subject: [PATCH 09/20] Fix last commit. Add flag MDB_ENV_ACTIVE instead of using the state of me_lfd. Require MDB_RDONLY for MDB_ROFS. Swap reader/writer mutex lock. --- libraries/libmdb/mdb.c | 58 +++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 9bfe2f8d1f..b39fbfbfcb 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -915,6 +915,8 @@ struct MDB_env { #define MDB_FATAL_ERROR 0x80000000U /** Read-only Filesystem. Allow read access, no locking. */ #define MDB_ROFS 0x40000000U + /** Some fields are initialized. */ +#define MDB_ENV_ACTIVE 0x20000000U uint32_t me_flags; /**< @ref mdb_env */ unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */ unsigned int me_maxreaders; /**< size of the reader table */ @@ -2583,14 +2585,13 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) /** Further setup required for opening an MDB environment */ static int -mdb_env_open2(MDB_env *env, unsigned int flags) +mdb_env_open2(MDB_env *env) { + unsigned int flags = env->me_flags; int i, newenv = 0, prot; MDB_meta meta; MDB_page *p; - env->me_flags |= flags; - memset(&meta, 0, sizeof(meta)); if ((i = mdb_env_read_header(env, &meta)) != 0) { @@ -2931,7 +2932,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) { rc = ErrCode(); - if (rc == ERROR_WRITE_PROTECT) { + if (rc == ERROR_WRITE_PROTECT && (env->me_flags & MDB_RDONLY)) { env->me_flags |= MDB_ROFS; return MDB_SUCCESS; } @@ -2949,7 +2950,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) int fdflags; if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT, mode)) == -1) { rc = ErrCode(); - if (rc == EROFS) { + if (rc == EROFS && (env->me_flags & MDB_RDONLY)) { env->me_flags |= MDB_ROFS; return MDB_SUCCESS; } @@ -2962,7 +2963,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) #else /* O_CLOEXEC on Linux: Open file and set FD_CLOEXEC atomically */ if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT|O_CLOEXEC, mode)) == -1) { rc = ErrCode(); - if (rc == EROFS) { + if (rc == EROFS && (env->me_flags & MDB_RDONLY)) { env->me_flags |= MDB_ROFS; return MDB_SUCCESS; } @@ -3154,15 +3155,15 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) sprintf(dpath, "%s" DATANAME, path); } - env->me_flags = 0; + /* silently ignore WRITEMAP if we're only getting read access */ + if (F_ISSET(flags, MDB_RDONLY|MDB_WRITEMAP)) + flags ^= MDB_WRITEMAP; + env->me_flags = flags |= MDB_ENV_ACTIVE; + rc = mdb_env_setup_locks(env, lpath, mode, &excl); if (rc) goto leave; - /* silently ignore WRITEMAP if we're only getting read access */ - if (F_ISSET(flags, MDB_RDONLY) && F_ISSET(flags, MDB_WRITEMAP)) - flags ^= MDB_WRITEMAP; - #ifdef _WIN32 if (F_ISSET(flags, MDB_RDONLY)) { oflags = GENERIC_READ; @@ -3187,7 +3188,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) goto leave; } - if ((rc = mdb_env_open2(env, flags)) == MDB_SUCCESS) { + if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { if (flags & (MDB_RDONLY|MDB_NOSYNC|MDB_NOMETASYNC|MDB_WRITEMAP)) { env->me_mfd = env->me_fd; } else { @@ -3242,7 +3243,7 @@ mdb_env_close0(MDB_env *env, int excl) { int i; - if (env->me_lfd == INVALID_HANDLE_VALUE) /* 1st field to get inited */ + if (!(env->me_flags & MDB_ENV_ACTIVE)) return; free(env->me_dbflags); @@ -3303,9 +3304,11 @@ mdb_env_close0(MDB_env *env, int excl) #endif munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); } - close(env->me_lfd); + if (env->me_lfd != INVALID_HANDLE_VALUE) { + close(env->me_lfd); + } - env->me_lfd = INVALID_HANDLE_VALUE; /* Mark env as reset */ + env->me_flags &= ~MDB_ENV_ACTIVE; } int @@ -3357,13 +3360,25 @@ mdb_env_copy(MDB_env *env, const char *path) } #endif - /* Temporarily block writers until we snapshot the meta pages */ - LOCK_MUTEX_W(env); - + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. + */ rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - if (rc) { - UNLOCK_MUTEX_W(env); + if (rc) goto leave; + + if (!(env->me_flags & MDB_ROFS)) { + /* We must start the actual read txn after blocking writers */ + mdb_txn_reset0(txn); + + /* Temporarily block writers until we snapshot the meta pages */ + LOCK_MUTEX_W(env); + + rc = mdb_txn_renew0(txn); + if (rc) { + UNLOCK_MUTEX_W(env); + goto leave; + } } wsize = env->me_psize * 2; @@ -3377,7 +3392,8 @@ mdb_env_copy(MDB_env *env, const char *path) rc = write(newfd, env->me_map, wsize); rc = (rc == (int)wsize) ? MDB_SUCCESS : ErrCode(); #endif - UNLOCK_MUTEX_W(env); + if (! (env->me_flags & MDB_ROFS)) + UNLOCK_MUTEX_W(env); if (rc) goto leave; From 092f9a5c66425535f9d538d8fefd0049ad65f452 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 15 Oct 2012 17:03:46 +0200 Subject: [PATCH 10/20] More for mdb_env_copy(). Fix #define _GNU_SOURCE = 1 to match any command line -D_GNU_SOURCE. .gitignore += mdb_copy. --- libraries/libmdb/.gitignore | 1 + libraries/libmdb/mdb.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libraries/libmdb/.gitignore b/libraries/libmdb/.gitignore index 134c457f7d..0d493fe188 100644 --- a/libraries/libmdb/.gitignore +++ b/libraries/libmdb/.gitignore @@ -1,6 +1,7 @@ mtest mtest[23456] testdb +mdb_copy mdb_stat *.[ao] *.so diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index b39fbfbfcb..07ecf99eba 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -32,7 +32,7 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE +#define _GNU_SOURCE 1 #include #include #include From e9d87866c49acdba4118259fb05de2dafb4d698f Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 15 Oct 2012 17:04:11 +0200 Subject: [PATCH 11/20] mdb_env_open(): Keep mdb_env_set_flags() flags. --- libraries/libmdb/mdb.c | 1 + libraries/libmdb/mdb.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 07ecf99eba..cd2ad9a510 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -3155,6 +3155,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) sprintf(dpath, "%s" DATANAME, path); } + flags |= env->me_flags; /* silently ignore WRITEMAP if we're only getting read access */ if (F_ISSET(flags, MDB_RDONLY|MDB_WRITEMAP)) flags ^= MDB_WRITEMAP; diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 1735fbd375..d78868cb08 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -401,6 +401,7 @@ int mdb_env_create(MDB_env **env); * @param[in] flags Special options for this environment. This parameter * must be set to 0 or by bitwise OR'ing together one or more of the * values described here. + * Flags set by mdb_env_set_flags() are also used. *
    *
  • #MDB_FIXEDMAP * use a fixed address for the mmap region. This flag must be specified @@ -501,7 +502,7 @@ void mdb_env_close(MDB_env *env); /** @brief Set environment flags. * - * This may be used to set some flags that weren't already set during + * This may be used to set some flags in addition to those from * #mdb_env_open(), or to unset these flags. * @param[in] env An environment handle returned by #mdb_env_create() * @param[in] flags The flags to change, bitwise OR'ed together From 88b7bb46394a53675b452b70e5a7a109626bdd3f Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 15 Oct 2012 17:04:43 +0200 Subject: [PATCH 12/20] Explicitly unlock the MDB lockfile on Windows. --- libraries/libmdb/mdb.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index cd2ad9a510..383616116e 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -2806,7 +2806,9 @@ mdb_env_excl_lock(MDB_env *env, int *excl) } else { OVERLAPPED ov; memset(&ov, 0, sizeof(ov)); - if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + if (LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + *excl = 0; + } else { rc = ErrCode(); } } @@ -3306,6 +3308,14 @@ mdb_env_close0(MDB_env *env, int excl) munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); } if (env->me_lfd != INVALID_HANDLE_VALUE) { +#ifdef _WIN32 + if (excl >= 0) { + /* Unlock the lockfile. Windows would have unlocked it + * after closing anyway, but not necessarily at once. + */ + UnlockFile(env->me_lfd, 0, 0, 1, 0); + } +#endif close(env->me_lfd); } From 94965307c831be615f5fbb771c348fd72217adc0 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Mon, 15 Oct 2012 17:04:48 +0200 Subject: [PATCH 13/20] ITS#7377 Errorcheck Windows calls and thread keys. --- libraries/libmdb/mdb.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 383616116e..2ac9278113 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -141,10 +141,11 @@ #define pthread_mutex_t HANDLE #define pthread_key_t DWORD #define pthread_self() GetCurrentThreadId() -#define pthread_key_create(x,y) (*(x) = TlsAlloc()) +#define pthread_key_create(x,y) \ + ((*(x) = TlsAlloc()) == TLS_OUT_OF_INDEXES ? ErrCode() : 0) #define pthread_key_delete(x) TlsFree(x) #define pthread_getspecific(x) TlsGetValue(x) -#define pthread_setspecific(x,y) TlsSetValue(x,y) +#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) #define pthread_mutex_unlock(x) ReleaseMutex(x) #define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) #define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) @@ -1636,6 +1637,7 @@ mdb_txn_renew0(MDB_txn *txn) { MDB_env *env = txn->mt_env; unsigned int i; + int rc; /* Setup db info */ txn->mt_numdbs = env->me_numdbs; @@ -1668,7 +1670,10 @@ mdb_txn_renew0(MDB_txn *txn) env->me_numreaders = env->me_txns->mti_numreaders; UNLOCK_MUTEX_R(env); r = &env->me_txns->mti_readers[i]; - pthread_setspecific(env->me_txkey, r); + if ((rc = pthread_setspecific(env->me_txkey, r)) != 0) { + env->me_txns->mti_readers[i].mr_pid = 0; + return rc; + } } txn->mt_txnid = r->mr_txnid = env->me_txns->mti_txnid; txn->mt_u.reader = r; @@ -2771,9 +2776,12 @@ mdb_env_share_locks(MDB_env *env, int *excl) * then release the existing exclusive lock. */ memset(&ov, 0, sizeof(ov)); - LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov); - UnlockFile(env->me_lfd, 0, 0, 1, 0); - *excl = 0; + if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + rc = ErrCode(); + } else { + UnlockFile(env->me_lfd, 0, 0, 1, 0); + *excl = 0; + } } #else { @@ -3029,7 +3037,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) mdb_all_sa.lpSecurityDescriptor = &mdb_null_sd; mdb_sec_inited = 1; } - GetFileInformationByHandle(env->me_lfd, &stbuf); + if (!GetFileInformationByHandle(env->me_lfd, &stbuf)) goto fail_errno; idbuf.volume = stbuf.dwVolumeSerialNumber; idbuf.nhigh = stbuf.nFileIndexHigh; idbuf.nlow = stbuf.nFileIndexLow; @@ -3209,7 +3217,9 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) } } DPRINTF("opened dbenv %p", (void *) env); - pthread_key_create(&env->me_txkey, mdb_env_reader_dest); + rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); + if (rc) + goto leave; env->me_numdbs = 2; /* this notes that me_txkey was set */ #ifdef _WIN32 /* Windows TLS callbacks need help finding their TLS info. */ From 65d40eb5d2c7c28df05e2c1d9b21d90e2a82e0b5 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 16 Oct 2012 14:53:25 -0700 Subject: [PATCH 14/20] Make sure increases of mapsize are persisted --- libraries/libmdb/mdb.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 2ac9278113..9fd7db3c89 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -2417,7 +2417,7 @@ static int mdb_env_write_meta(MDB_txn *txn) { MDB_env *env; - MDB_meta meta, metab; + MDB_meta meta, metab, *mp; off_t off; int rc, len, toggle; char *ptr; @@ -2433,9 +2433,12 @@ mdb_env_write_meta(MDB_txn *txn) toggle, txn->mt_dbs[MAIN_DBI].md_root); env = txn->mt_env; + mp = env->me_metas[toggle]; if (env->me_flags & MDB_WRITEMAP) { - MDB_meta *mp = env->me_metas[toggle]; + /* Persist any increases of mapsize config */ + if (env->me_mapsize > mp->mm_mapsize) + mp->mm_mapsize = env->me_mapsize; mp->mm_dbs[0] = txn->mt_dbs[0]; mp->mm_dbs[1] = txn->mt_dbs[1]; mp->mm_last_pg = txn->mt_next_pgno - 1; @@ -2456,7 +2459,13 @@ mdb_env_write_meta(MDB_txn *txn) metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; ptr = (char *)&meta; - off = offsetof(MDB_meta, mm_dbs[0].md_depth); + if (env->me_mapsize > mp->mm_mapsize) { + /* Persist any increases of mapsize config */ + meta.mm_mapsize = env->me_mapsize; + off = offsetof(MDB_meta, mm_mapsize); + } else { + off = offsetof(MDB_meta, mm_dbs[0].md_depth); + } len = sizeof(MDB_meta) - off; ptr += off; @@ -2604,11 +2613,11 @@ mdb_env_open2(MDB_env *env) return i; DPUTS("new mdbenv"); newenv = 1; + meta.mm_mapsize = env->me_mapsize > DEFAULT_MAPSIZE ? env->me_mapsize : DEFAULT_MAPSIZE; } - if (!env->me_mapsize) { - env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize; - } + if (env->me_mapsize < meta.mm_mapsize) + env->me_mapsize = meta.mm_mapsize; #ifdef _WIN32 { @@ -2657,7 +2666,6 @@ mdb_env_open2(MDB_env *env) #endif if (newenv) { - meta.mm_mapsize = env->me_mapsize; if (flags & MDB_FIXEDMAP) meta.mm_address = env->me_map; i = mdb_env_init_meta(env, &meta); @@ -3336,7 +3344,7 @@ int mdb_env_copy(MDB_env *env, const char *path) { MDB_txn *txn = NULL; - int rc, len, oflags; + int rc, len; size_t wsize; char *lpath, *ptr; HANDLE newfd = INVALID_HANDLE_VALUE; From 7f0771a3ed4ec42bd57f246919aba1dd755a207c Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 16 Oct 2012 15:28:20 -0700 Subject: [PATCH 15/20] Merge mdb_stata into mdb_stat --- libraries/libmdb/mdb_stat.c | 112 +++++++++++++++++++++++++++++------ libraries/libmdb/mdb_stata.c | 84 -------------------------- 2 files changed, 93 insertions(+), 103 deletions(-) delete mode 100644 libraries/libmdb/mdb_stata.c diff --git a/libraries/libmdb/mdb_stat.c b/libraries/libmdb/mdb_stat.c index 97bd8d4467..73891cfcdb 100644 --- a/libraries/libmdb/mdb_stat.c +++ b/libraries/libmdb/mdb_stat.c @@ -13,49 +13,123 @@ */ #include #include -#include +#include +#include #include "mdb.h" -int main(int argc,char * argv[]) +static void prstat(MDB_stat *ms) { - int rc; + printf("Page size: %u\n", ms->ms_psize); + printf("Tree depth: %u\n", ms->ms_depth); + printf("Branch pages: %zu\n", ms->ms_branch_pages); + printf("Leaf pages: %zu\n", ms->ms_leaf_pages); + printf("Overflow pages: %zu\n", ms->ms_overflow_pages); + printf("Entries: %zu\n", ms->ms_entries); +} + +static void usage(char *prog) +{ + fprintf(stderr, "usage: %s dbpath [-a|-s subdb]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; MDB_env *env; MDB_txn *txn; MDB_dbi dbi; MDB_stat mst; - char *envname = argv[1]; + char *prog = argv[0]; + char *envname; char *subname = NULL; + int alldbs = 0; - rc = mdb_env_create(&env); - - if (argc > 2) { - mdb_env_set_maxdbs(env, 4); - subname = argv[2]; + if (argc < 2) { + usage(prog); } - rc = mdb_env_open(env, envname, MDB_RDONLY, 0); + /* -a: print stat of main DB and all subDBs + * -s: print stat of only the named subDB + * (default) print stat of only the main DB + */ + while ((i = getopt(argc, argv, "as:")) != EOF) { + switch(i) { + case 'a': + alldbs++; + break; + case 's': + subname = optarg; + break; + default: + fprintf(stderr, "%s: unrecognized option -%c\n", prog, optopt); + usage(prog); + } + } + + if (optind != argc - 1) + usage(prog); + + envname = argv[optind]; + rc = mdb_env_create(&env); + + if (alldbs || subname) { + mdb_env_set_maxdbs(env, 4); + } + + rc = mdb_env_open(env, envname, MDB_RDONLY, 0664); if (rc) { - printf("mdb_env_open failed, error %d\n", rc); + printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d\n", rc); + printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_open(txn, subname, 0, &dbi); if (rc) { - printf("mdb_open failed, error %d\n", rc); + printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } rc = mdb_stat(txn, dbi, &mst); - printf("Page size: %u\n", mst.ms_psize); - printf("Tree depth: %u\n", mst.ms_depth); - printf("Branch pages: %zu\n", mst.ms_branch_pages); - printf("Leaf pages: %zu\n", mst.ms_leaf_pages); - printf("Overflow pages: %zu\n", mst.ms_overflow_pages); - printf("Entries: %zu\n", mst.ms_entries); + if (rc) { + printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + + if (alldbs) { + MDB_cursor *cursor; + MDB_val key; + + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT)) == 0) { + char *str = malloc(key.mv_size+1); + MDB_dbi db2; + memcpy(str, key.mv_data, key.mv_size); + str[key.mv_size] = '\0'; + rc = mdb_open(txn, str, 0, &db2); + if (rc == MDB_SUCCESS) + printf("\n%s\n", str); + free(str); + if (rc) continue; + rc = mdb_stat(txn, db2, &mst); + if (rc) { + printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + mdb_close(env, db2); + } + mdb_cursor_close(cursor); + } + mdb_close(env, dbi); txn_abort: mdb_txn_abort(txn); diff --git a/libraries/libmdb/mdb_stata.c b/libraries/libmdb/mdb_stata.c deleted file mode 100644 index 7cfebb4914..0000000000 --- a/libraries/libmdb/mdb_stata.c +++ /dev/null @@ -1,84 +0,0 @@ -/* mdb_stat.c - memory-mapped database status tool */ -/* - * Copyright 2011 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ -#include -#include -#include -#include "mdb.h" - -int main(int argc,char * argv[]) -{ - int rc; - MDB_env *env; - MDB_txn *txn; - MDB_dbi dbi; - MDB_stat mst; - MDB_cursor *cursor; - MDB_val key; - char *envname = argv[1]; - - rc = mdb_env_create(&env); - - mdb_env_set_maxdbs(env, 4); - - rc = mdb_env_open(env, envname, MDB_RDONLY, 0); - if (rc) { - printf("mdb_env_open failed, error %d\n", rc); - goto env_close; - } - rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - if (rc) { - printf("mdb_txn_begin failed, error %d\n", rc); - goto env_close; - } - rc = mdb_open(txn, NULL, 0, &dbi); - if (rc) { - printf("mdb_open failed, error %d\n", rc); - goto txn_abort; - } - - rc = mdb_stat(txn, dbi, &mst); - printf("Page size: %u\n", mst.ms_psize); - printf("Tree depth: %u\n", mst.ms_depth); - printf("Branch pages: %zu\n", mst.ms_branch_pages); - printf("Leaf pages: %zu\n", mst.ms_leaf_pages); - printf("Overflow pages: %zu\n", mst.ms_overflow_pages); - printf("Entries: %zu\n", mst.ms_entries); - - rc = mdb_cursor_open(txn, dbi, &cursor); - while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT)) == 0) { - char *str = malloc(key.mv_size+1); - MDB_dbi db2; - memcpy(str, key.mv_data, key.mv_size); - str[key.mv_size] = '\0'; - printf("\n%s\n", str); - rc = mdb_open(txn, str, 0, &db2); - if (rc) break; - free(str); - rc = mdb_stat(txn, db2, &mst); - printf("Tree depth: %u\n", mst.ms_depth); - printf("Branch pages: %zu\n", mst.ms_branch_pages); - printf("Leaf pages: %zu\n", mst.ms_leaf_pages); - printf("Overflow pages: %zu\n", mst.ms_overflow_pages); - printf("Entries: %zu\n", mst.ms_entries); - mdb_close(env, db2); - } - mdb_cursor_close(cursor); - mdb_close(env, dbi); -txn_abort: - mdb_txn_abort(txn); -env_close: - mdb_env_close(env); - - return rc ? EXIT_FAILURE : EXIT_SUCCESS; -} From 754cd88819cceafe503b7a5c428b52c6abbcb748 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 16 Oct 2012 16:22:21 -0700 Subject: [PATCH 16/20] Add mdb_env_info() to retrieve interesting env params --- libraries/libmdb/mdb.c | 17 +++++++++++++++++ libraries/libmdb/mdb.h | 17 +++++++++++++++++ libraries/libmdb/mdb_stat.c | 19 +++++++++++++++++-- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 9fd7db3c89..f3c482d888 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -6651,6 +6651,23 @@ mdb_env_stat(MDB_env *env, MDB_stat *arg) return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); } +int +mdb_env_info(MDB_env *env, MDB_envinfo *arg) +{ + int toggle; + + if (env == NULL || arg == NULL) + return EINVAL; + + toggle = mdb_env_pick_meta(env); + arg->me_mapsize = env->me_mapsize; + arg->me_maxreaders = env->me_maxreaders; + arg->me_numreaders = env->me_numreaders; + arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; + arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; + return MDB_SUCCESS; +} + /** Set the default comparison functions for a database. * Called immediately after a database is opened to set the defaults. * The user can then override them with #mdb_set_compare() or diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index d78868cb08..739fee7265 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -358,6 +358,15 @@ typedef struct MDB_stat { size_t ms_entries; /**< Number of data items */ } MDB_stat; +/** @brief Information about the environment */ +typedef struct MDB_envinfo { + size_t me_mapsize; /**< Size of the data memory map */ + size_t me_last_txnid; /**< ID of the last committed transaction */ + size_t me_last_pgno; /**< ID of the last used page */ + unsigned int me_maxreaders; /**< maximum number of threads for the environment */ + unsigned int me_numreaders; /**< maximum number of threads used in the environment */ +} MDB_envinfo; + /** @brief Return the mdb library version information. * * @param[out] major if non-NULL, the library major version number is copied here @@ -471,6 +480,14 @@ int mdb_env_copy(MDB_env *env, const char *path); */ int mdb_env_stat(MDB_env *env, MDB_stat *stat); + /** @brief Return information about the MDB environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] stat The address of an #MDB_envinfo structure + * where the information will be copied + */ +int mdb_env_info(MDB_env *env, MDB_envinfo *stat); + /** @brief Flush the data buffers to disk. * * Data is always written to disk when #mdb_txn_commit() is called, diff --git a/libraries/libmdb/mdb_stat.c b/libraries/libmdb/mdb_stat.c index 73891cfcdb..8b196d1757 100644 --- a/libraries/libmdb/mdb_stat.c +++ b/libraries/libmdb/mdb_stat.c @@ -40,10 +40,11 @@ int main(int argc, char *argv[]) MDB_txn *txn; MDB_dbi dbi; MDB_stat mst; + MDB_envinfo mei; char *prog = argv[0]; char *envname; char *subname = NULL; - int alldbs = 0; + int alldbs = 0, envinfo = 0; if (argc < 2) { usage(prog); @@ -51,13 +52,17 @@ int main(int argc, char *argv[]) /* -a: print stat of main DB and all subDBs * -s: print stat of only the named subDB + * -e: print env info * (default) print stat of only the main DB */ - while ((i = getopt(argc, argv, "as:")) != EOF) { + while ((i = getopt(argc, argv, "aes:")) != EOF) { switch(i) { case 'a': alldbs++; break; + case 'e': + envinfo++; + break; case 's': subname = optarg; break; @@ -87,6 +92,16 @@ int main(int argc, char *argv[]) printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } + + if (envinfo) { + rc = mdb_env_info(env, &mei); + printf("Map size: %zu \n", mei.me_mapsize); + printf("Last transaction ID: %zu\n", mei.me_last_txnid); + printf("Last page used: %zu\n", mei.me_last_pgno); + printf("Max readers: %u\n", mei.me_maxreaders); + printf("Number of readers used: %u\n", mei.me_numreaders); + } + rc = mdb_open(txn, subname, 0, &dbi); if (rc) { printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); From 617769bce5bcac809791adb11301e40d27c31566 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 16 Oct 2012 16:52:13 -0700 Subject: [PATCH 17/20] Tweak output, add free page info --- libraries/libmdb/mdb.c | 3 +- libraries/libmdb/mdb.h | 3 +- libraries/libmdb/mdb_stat.c | 75 ++++++++++++++++++++++++++++--------- 3 files changed, 62 insertions(+), 19 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index f3c482d888..89bfd71f36 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -6660,11 +6660,12 @@ mdb_env_info(MDB_env *env, MDB_envinfo *arg) return EINVAL; toggle = mdb_env_pick_meta(env); + arg->me_mapaddr = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : 0; arg->me_mapsize = env->me_mapsize; arg->me_maxreaders = env->me_maxreaders; arg->me_numreaders = env->me_numreaders; - arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; + arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; return MDB_SUCCESS; } diff --git a/libraries/libmdb/mdb.h b/libraries/libmdb/mdb.h index 739fee7265..e3fc129829 100644 --- a/libraries/libmdb/mdb.h +++ b/libraries/libmdb/mdb.h @@ -360,9 +360,10 @@ typedef struct MDB_stat { /** @brief Information about the environment */ typedef struct MDB_envinfo { + void *me_mapaddr; /**< Address of map, if fixed */ size_t me_mapsize; /**< Size of the data memory map */ - size_t me_last_txnid; /**< ID of the last committed transaction */ size_t me_last_pgno; /**< ID of the last used page */ + size_t me_last_txnid; /**< ID of the last committed transaction */ unsigned int me_maxreaders; /**< maximum number of threads for the environment */ unsigned int me_numreaders; /**< maximum number of threads used in the environment */ } MDB_envinfo; diff --git a/libraries/libmdb/mdb_stat.c b/libraries/libmdb/mdb_stat.c index 8b196d1757..88ac801807 100644 --- a/libraries/libmdb/mdb_stat.c +++ b/libraries/libmdb/mdb_stat.c @@ -19,17 +19,19 @@ static void prstat(MDB_stat *ms) { - printf("Page size: %u\n", ms->ms_psize); - printf("Tree depth: %u\n", ms->ms_depth); - printf("Branch pages: %zu\n", ms->ms_branch_pages); - printf("Leaf pages: %zu\n", ms->ms_leaf_pages); - printf("Overflow pages: %zu\n", ms->ms_overflow_pages); - printf("Entries: %zu\n", ms->ms_entries); +#if 0 + printf(" Page size: %u\n", ms->ms_psize); +#endif + printf(" Tree depth: %u\n", ms->ms_depth); + printf(" Branch pages: %zu\n", ms->ms_branch_pages); + printf(" Leaf pages: %zu\n", ms->ms_leaf_pages); + printf(" Overflow pages: %zu\n", ms->ms_overflow_pages); + printf(" Entries: %zu\n", ms->ms_entries); } static void usage(char *prog) { - fprintf(stderr, "usage: %s dbpath [-a|-s subdb]\n", prog); + fprintf(stderr, "usage: %s dbpath [-e] [-f] [-n] [-a|-s subdb]\n", prog); exit(EXIT_FAILURE); } @@ -44,7 +46,7 @@ int main(int argc, char *argv[]) char *prog = argv[0]; char *envname; char *subname = NULL; - int alldbs = 0, envinfo = 0; + int alldbs = 0, envinfo = 0, envflags = 0, freinfo = 0; if (argc < 2) { usage(prog); @@ -53,9 +55,11 @@ int main(int argc, char *argv[]) /* -a: print stat of main DB and all subDBs * -s: print stat of only the named subDB * -e: print env info + * -f: print freelist info + * -n: use NOSUBDIR flag on env_open * (default) print stat of only the main DB */ - while ((i = getopt(argc, argv, "aes:")) != EOF) { + while ((i = getopt(argc, argv, "aefns:")) != EOF) { switch(i) { case 'a': alldbs++; @@ -63,11 +67,16 @@ int main(int argc, char *argv[]) case 'e': envinfo++; break; + case 'f': + freinfo++; + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; case 's': subname = optarg; break; default: - fprintf(stderr, "%s: unrecognized option -%c\n", prog, optopt); usage(prog); } } @@ -82,7 +91,7 @@ int main(int argc, char *argv[]) mdb_env_set_maxdbs(env, 4); } - rc = mdb_env_open(env, envname, MDB_RDONLY, 0664); + rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); if (rc) { printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; @@ -94,12 +103,43 @@ int main(int argc, char *argv[]) } if (envinfo) { + rc = mdb_env_stat(env, &mst); rc = mdb_env_info(env, &mei); - printf("Map size: %zu \n", mei.me_mapsize); - printf("Last transaction ID: %zu\n", mei.me_last_txnid); - printf("Last page used: %zu\n", mei.me_last_pgno); - printf("Max readers: %u\n", mei.me_maxreaders); - printf("Number of readers used: %u\n", mei.me_numreaders); + printf("Environment Info\n"); + printf(" Map address: %p\n", mei.me_mapaddr); + printf(" Map size: %zu\n", mei.me_mapsize); + printf(" Page size: %u\n", mst.ms_psize); + printf(" Max pages: %zu\n", mei.me_mapsize / mst.ms_psize); + printf(" Number of pages used: %zu\n", mei.me_last_pgno+1); + printf(" Last transaction ID: %zu\n", mei.me_last_txnid); + printf(" Max readers: %u\n", mei.me_maxreaders); + printf(" Number of readers used: %u\n", mei.me_numreaders); + } + + if (freinfo) { + MDB_cursor *cursor; + MDB_val data; + size_t pages = 0, *iptr; + + printf("Freelist Status\n"); + dbi = 0; + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + rc = mdb_stat(txn, dbi, &mst); + if (rc) { + printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + while ((rc = mdb_cursor_get(cursor, NULL, &data, MDB_NEXT)) == 0) { + iptr = data.mv_data; + pages += *iptr; + } + mdb_cursor_close(cursor); + prstat(&mst); + printf(" Free pages: %zu\n", pages); } rc = mdb_open(txn, subname, 0, &dbi); @@ -113,6 +153,7 @@ int main(int argc, char *argv[]) printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } + printf("Status of %s\n", subname ? subname : "Main DB"); prstat(&mst); if (alldbs) { @@ -131,7 +172,7 @@ int main(int argc, char *argv[]) str[key.mv_size] = '\0'; rc = mdb_open(txn, str, 0, &db2); if (rc == MDB_SUCCESS) - printf("\n%s\n", str); + printf("Status of %s\n", str); free(str); if (rc) continue; rc = mdb_stat(txn, db2, &mst); From 5c1ee7f7ba9580d5ff29e74cbb02eee335c33d94 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 22 Oct 2012 15:58:46 -0700 Subject: [PATCH 18/20] Fix MDB_PREV Could return 1 garbage record before actual data, if starting from EOF --- libraries/libmdb/mdb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 89bfd71f36..994a5d95f3 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -3982,8 +3982,12 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) : (mc->mc_ki[mc->mc_top] == 0)) { DPRINTF("no more keys left, moving to %s sibling", move_right ? "right" : "left"); - if ((rc = mdb_cursor_sibling(mc, move_right)) != MDB_SUCCESS) + if ((rc = mdb_cursor_sibling(mc, move_right)) != MDB_SUCCESS) { + /* undo cursor_pop before returning */ + mc->mc_top++; + mc->mc_snum++; return rc; + } } else { if (move_right) mc->mc_ki[mc->mc_top]++; @@ -4393,8 +4397,8 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) } assert(IS_LEAF(mc->mc_pg[mc->mc_top])); - mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; mc->mc_flags |= C_INITIALIZED|C_EOF; + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; } leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); @@ -4525,9 +4529,10 @@ fetchm: case MDB_PREV_NODUP: if (!(mc->mc_flags & C_INITIALIZED) || (mc->mc_flags & C_EOF)) { rc = mdb_cursor_last(mc, key, data); - mc->mc_flags &= ~C_EOF; - } else - rc = mdb_cursor_prev(mc, key, data, op); + mc->mc_flags |= C_INITIALIZED; + mc->mc_ki[mc->mc_top]++; + } + rc = mdb_cursor_prev(mc, key, data, op); break; case MDB_FIRST: rc = mdb_cursor_first(mc, key, data); @@ -4575,7 +4580,9 @@ mdb_cursor_touch(MDB_cursor *mc) if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) { MDB_cursor mc2; - mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); + MDB_xcursor mcx; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, + mc->mc_txn->mt_dbs[MAIN_DBI].md_flags & MDB_DUPSORT ? &mcx : NULL); rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY); if (rc) return rc; From 5f2ddb5532df92c765f2f36a7606ed6ef28d23db Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 22 Oct 2012 16:13:06 -0700 Subject: [PATCH 19/20] Minor cleanup --- libraries/libmdb/mdb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index 994a5d95f3..e4d543eb22 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -1949,15 +1949,15 @@ mdb_txn_commit(MDB_txn *txn) return EINVAL; } - /* Merge (and close) our cursors with parent's */ - mdb_cursor_merge(txn); - if (txn->mt_parent) { MDB_db *ip, *jp; MDB_dbi i; unsigned x, y; MDB_ID2L dst, src; + /* Merge (and close) our cursors with parent's */ + mdb_cursor_merge(txn); + /* Update parent's DB table */ ip = &txn->mt_parent->mt_dbs[2]; jp = &txn->mt_dbs[2]; @@ -2140,7 +2140,7 @@ again: while (env->me_pgfree) { MDB_oldpages *mop = env->me_pgfree; env->me_pgfree = mop->mo_next; - free(mop);; + free(mop); } /* Check for growth of freelist again */ @@ -4000,7 +4000,7 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp))) - return rc;; + return rc; mdb_cursor_push(mc, mp); From 66f2b7b2ec4318557be079ad4764becf4e36ec73 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 22 Oct 2012 17:03:15 -0700 Subject: [PATCH 20/20] Make sure mdb_open flags are committed on main DB If no data was modified in the txn, mdb_open flag changes were dropped. --- libraries/libmdb/mdb.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/libraries/libmdb/mdb.c b/libraries/libmdb/mdb.c index e4d543eb22..7d89ec3fbc 100644 --- a/libraries/libmdb/mdb.c +++ b/libraries/libmdb/mdb.c @@ -826,6 +826,7 @@ struct MDB_txn { */ #define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ #define MDB_TXN_ERROR 0x02 /**< an error has occurred */ +#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */ /** @} */ unsigned int mt_flags; /**< @ref mdb_txn */ /** Tracks which of the two meta pages was used at the start @@ -2005,7 +2006,7 @@ mdb_txn_commit(MDB_txn *txn) return EINVAL; } - if (!txn->mt_u.dirty_list[0].mid) + if (!txn->mt_u.dirty_list[0].mid && !(txn->mt_flags & MDB_TXN_DIRTY)) goto done; DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu", @@ -6715,8 +6716,13 @@ int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) /* main DB? */ if (!name) { *dbi = MAIN_DBI; - if (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY)) - txn->mt_dbs[MAIN_DBI].md_flags |= (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY)); + if (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY)) { + /* make sure flag changes get committed */ + if ((txn->mt_dbs[MAIN_DBI].md_flags | flags) != txn->mt_dbs[MAIN_DBI].md_flags) { + txn->mt_dbs[MAIN_DBI].md_flags |= (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY)); + txn->mt_flags |= MDB_TXN_DIRTY; + } + } mdb_default_cmp(txn, MAIN_DBI); return MDB_SUCCESS; }