From 277526d0f00489637798f84b5cf4e8d65a1690ca Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 26 Jun 2014 05:19:47 -0700 Subject: [PATCH 01/24] Setup for gcov/coverage testing --- libraries/liblmdb/.gitignore | 3 +++ libraries/liblmdb/Makefile | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/libraries/liblmdb/.gitignore b/libraries/liblmdb/.gitignore index 0b4b1cba67..f3277afe42 100644 --- a/libraries/liblmdb/.gitignore +++ b/libraries/liblmdb/.gitignore @@ -10,6 +10,9 @@ mdb_stat *.bak *.orig *.rej +*.gcov +*.gcda +*.gcno core core.* valgrind.* diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile index b65c9b9a8b..bb0b1e8c72 100644 --- a/libraries/liblmdb/Makefile +++ b/libraries/liblmdb/Makefile @@ -76,3 +76,18 @@ midl.o: midl.c midl.h %.o: %.c lmdb.h $(CC) $(CFLAGS) $(CPPFLAGS) -c $< + +coverage: xmtest + -rm -rf testdb; mkdir testdb + ./xmtest + gcov xmdb.c + gcov xmidl.c + +xmtest: mtest.o xmdb.o xmidl.o + gcc -o xmtest mtest.o xmdb.o xmidl.o -pthread -fprofile-arcs -ftest-coverage + +xmdb.o: mdb.c lmdb.h midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 -fprofile-arcs -ftest-coverage -c mdb.c -o $@ + +xmidl.o: midl.c midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 -fprofile-arcs -ftest-coverage -c midl.c -o $@ From c73f0877500c01d978a4e8b1d50bd64011f70984 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Fri, 27 Jun 2014 10:53:11 -0700 Subject: [PATCH 02/24] Trap signals Try to exit cleanly to avoid leaving stale readers. Not a critical issue since mdb_reader_check can take care of them, but still cleaner. --- libraries/liblmdb/mdb_dump.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/libraries/liblmdb/mdb_dump.c b/libraries/liblmdb/mdb_dump.c index 5242519ebc..c684375979 100644 --- a/libraries/liblmdb/mdb_dump.c +++ b/libraries/liblmdb/mdb_dump.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "lmdb.h" #define PRINT 1 @@ -37,6 +38,13 @@ flagbit dbflags[] = { { 0, NULL } }; +static volatile sig_atomic_t gotsig; + +static void dumpsig( int sig ) +{ + gotsig=1; +} + static const char hexc[] = "0123456789abcdef"; static void hex(unsigned char c) @@ -112,6 +120,10 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) if (rc) return rc; while ((rc = mdb_cursor_get(mc, &key, &data, MDB_NEXT) == MDB_SUCCESS)) { + if (gotsig) { + rc = EINTR; + break; + } if (mode & PRINT) { text(&key); text(&data); @@ -196,6 +208,15 @@ int main(int argc, char *argv[]) if (optind != argc - 1) usage(prog); +#ifdef SIGPIPE + signal(SIGPIPE, dumpsig); +#endif +#ifdef SIGHUP + signal(SIGHUP, dumpsig); +#endif + signal(SIGINT, dumpsig); + signal(SIGTERM, dumpsig); + envname = argv[optind]; rc = mdb_env_create(&env); @@ -247,6 +268,8 @@ int main(int argc, char *argv[]) list++; } else { rc = dumpit(txn, db2, str); + if (rc) + break; } mdb_close(env, db2); } From 91d8ad1d3c3364a667ed91cb8551327ea14ea8e0 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 28 Jun 2014 13:38:03 +0200 Subject: [PATCH 03/24] Fix mdb_page_loose() vs. mdb_page_flush(). Do not overwrite loose pages' mp_pgno with mp_next. mdb_page_flush() will use mp_pgno to rebuild dirty_list. --- libraries/liblmdb/mdb.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index c21f556e08..c5311fab32 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -701,6 +701,9 @@ typedef struct MDB_page { /** The number of overflow pages needed to store the given size. */ #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) + /** Link in #MDB_txn.%mt_loose_pages list */ +#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)METADATA(p)) + /** Header for a single key/data pair within a page. * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. * We guarantee 2-byte alignment for 'MDB_node's. @@ -900,7 +903,7 @@ struct MDB_txn { */ MDB_IDL mt_free_pgs; /** The list of loose pages that became unused and may be reused - * in this transaction. + * in this transaction, linked through #NEXT_LOOSE_PAGE(page). */ MDB_page *mt_loose_pgs; /** The sorted list of dirty pages we temporarily wrote to disk @@ -1569,9 +1572,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) } } if (loose) { - pgno_t *pp = (pgno_t *)mp->mp_ptrs; - *pp = pgno; - mp->mp_next = mc->mc_txn->mt_loose_pgs; + NEXT_LOOSE_PAGE(mp) = mc->mc_txn->mt_loose_pgs; mc->mc_txn->mt_loose_pgs = mp; mp->mp_flags |= P_LOOSE; } else { @@ -1632,7 +1633,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) } /* Loose pages shouldn't be spilled */ - for (dp = txn->mt_loose_pgs; dp; dp=dp->mp_next) { + for (dp = txn->mt_loose_pgs; dp; dp = NEXT_LOOSE_PAGE(dp)) { if ((dp->mp_flags & Mask) == pflags) dp->mp_flags ^= P_KEEP; } @@ -1866,11 +1867,8 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) /* If there are any loose pages, just use them */ if (num == 1 && txn->mt_loose_pgs) { - pgno_t *pp; np = txn->mt_loose_pgs; - txn->mt_loose_pgs = np->mp_next; - pp = (pgno_t *)np->mp_ptrs; - np->mp_pgno = *pp; + txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np); *mp = np; return MDB_SUCCESS; } @@ -2741,28 +2739,22 @@ mdb_freelist_save(MDB_txn *txn) */ if (txn->mt_loose_pgs) { MDB_page *mp = txn->mt_loose_pgs; - pgno_t *pp; /* Just return them to freeDB */ if (env->me_pghead) { int i, j; mop = env->me_pghead; - while(mp) { - pgno_t pg; - pp = (pgno_t *)mp->mp_ptrs; - pg = *pp; + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) { + pgno_t pg = mp->mp_pgno; j = mop[0] + 1; for (i = mop[0]; i && mop[i] < pg; i--) mop[j--] = mop[i]; mop[j] = pg; mop[0] += 1; - mp = mp->mp_next; } } else { /* Oh well, they were wasted. Put on freelist */ - while(mp) { - pp = (pgno_t *)mp->mp_ptrs; - mdb_midl_append(&txn->mt_free_pgs, *pp); - mp = mp->mp_next; + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) { + mdb_midl_append(&txn->mt_free_pgs, mp->mp_pgno); } } txn->mt_loose_pgs = NULL; From b09e46904c1c059bd5086243e3915b6be510e57d Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 30 Jun 2014 04:41:50 -0700 Subject: [PATCH 04/24] ITS#7886 fix mdb_copy write size Don't try to write past end of file --- libraries/liblmdb/mdb.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index c5311fab32..750c2bb161 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4505,7 +4505,23 @@ mdb_env_copyfd(MDB_env *env, HANDLE fd) if (rc) goto leave; - wsize = txn->mt_next_pgno * env->me_psize - wsize; + w2 = txn->mt_next_pgno * env->me_psize; +#ifdef WIN32 + { + LARGE_INTEGER fsize; + GetFileSizeEx(env->me_fd, &fsize); + if (w2 > fsize.QuadPart) + w2 = fsize.QuadPart; + } +#else + { + struct stat st; + fstat(env->me_fd, &st); + if (w2 > (size_t)st.st_size) + w2 = st.st_size; + } +#endif + wsize = w2 - wsize; while (wsize > 0) { if (wsize > MAX_WRITE) w2 = MAX_WRITE; From ee599c525052948b623e8980db67c7032feedb4e Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 30 Jun 2014 04:42:41 -0700 Subject: [PATCH 05/24] More gcov setup tweaks --- libraries/liblmdb/Makefile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile index bb0b1e8c72..60ab528f65 100644 --- a/libraries/liblmdb/Makefile +++ b/libraries/liblmdb/Makefile @@ -77,17 +77,21 @@ midl.o: midl.c midl.h %.o: %.c lmdb.h $(CC) $(CFLAGS) $(CPPFLAGS) -c $< +COV_FLAGS=-fprofile-arcs -ftest-coverage +COV_OBJS=xmdb.o xmidl.o + coverage: xmtest - -rm -rf testdb; mkdir testdb - ./xmtest + for i in mtest*.c [0-9]*.c; do j=`basename \$$i .c`; $(MAKE) $$j.o; \ + gcc -o x$$j $$j.o $(COV_OBJS) -pthread $(COV_FLAGS); \ + rm -rf testdb; mkdir testdb; ./x$$j; done gcov xmdb.c gcov xmidl.c xmtest: mtest.o xmdb.o xmidl.o - gcc -o xmtest mtest.o xmdb.o xmidl.o -pthread -fprofile-arcs -ftest-coverage + gcc -o xmtest mtest.o xmdb.o xmidl.o -pthread $(COV_FLAGS) xmdb.o: mdb.c lmdb.h midl.h - $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 -fprofile-arcs -ftest-coverage -c mdb.c -o $@ + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c mdb.c -o $@ xmidl.o: midl.c midl.h - $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 -fprofile-arcs -ftest-coverage -c midl.c -o $@ + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c midl.c -o $@ From c5be1292ac21fed2242ad98f3ae5b491d8b8cdea Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 1 Jul 2014 05:11:43 -0700 Subject: [PATCH 06/24] All diagnostics should go to stderr --- libraries/liblmdb/mdb_dump.c | 8 ++++---- libraries/liblmdb/mdb_load.c | 12 ++++++------ libraries/liblmdb/mdb_stat.c | 16 ++++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/libraries/liblmdb/mdb_dump.c b/libraries/liblmdb/mdb_dump.c index c684375979..45243f42a4 100644 --- a/libraries/liblmdb/mdb_dump.c +++ b/libraries/liblmdb/mdb_dump.c @@ -226,19 +226,19 @@ int main(int argc, char *argv[]) rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_open(txn, subname, 0, &dbi); if (rc) { - printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } @@ -249,7 +249,7 @@ int main(int argc, char *argv[]) rc = mdb_cursor_open(txn, dbi, &cursor); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { diff --git a/libraries/liblmdb/mdb_load.c b/libraries/liblmdb/mdb_load.c index 0cf02ada5e..e33dd5e1cc 100644 --- a/libraries/liblmdb/mdb_load.c +++ b/libraries/liblmdb/mdb_load.c @@ -305,7 +305,7 @@ int main(int argc, char *argv[]) rc = mdb_env_open(env, envname, envflags, 0664); if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } @@ -324,19 +324,19 @@ int main(int argc, char *argv[]) rc = mdb_txn_begin(env, NULL, 0, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); if (rc) { - printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } rc = mdb_cursor_open(txn, dbi, &mc); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } @@ -366,12 +366,12 @@ int main(int argc, char *argv[]) } rc = mdb_txn_begin(env, NULL, 0, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_cursor_open(txn, dbi, &mc); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } batch = 0; diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c index eac2c60274..25c092c04f 100644 --- a/libraries/liblmdb/mdb_stat.c +++ b/libraries/liblmdb/mdb_stat.c @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } @@ -145,7 +145,7 @@ int main(int argc, char *argv[]) rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } @@ -158,12 +158,12 @@ int main(int argc, char *argv[]) dbi = 0; rc = mdb_cursor_open(txn, dbi, &cursor); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } rc = mdb_stat(txn, dbi, &mst); if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } prstat(&mst); @@ -201,13 +201,13 @@ int main(int argc, char *argv[]) rc = mdb_open(txn, subname, 0, &dbi); if (rc) { - printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } rc = mdb_stat(txn, dbi, &mst); if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } printf("Status of %s\n", subname ? subname : "Main DB"); @@ -219,7 +219,7 @@ int main(int argc, char *argv[]) rc = mdb_cursor_open(txn, dbi, &cursor); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { @@ -237,7 +237,7 @@ int main(int argc, char *argv[]) if (rc) continue; rc = mdb_stat(txn, db2, &mst); if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } prstat(&mst); From 4d65cf13265e6936ffca9047dbd2e323b1ace7c5 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 1 Jul 2014 18:53:47 -0700 Subject: [PATCH 07/24] dump/load need mapsize etc. --- libraries/liblmdb/mdb_dump.c | 8 ++++++ libraries/liblmdb/mdb_load.c | 54 ++++++++++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/mdb_dump.c b/libraries/liblmdb/mdb_dump.c index 45243f42a4..925532593b 100644 --- a/libraries/liblmdb/mdb_dump.c +++ b/libraries/liblmdb/mdb_dump.c @@ -91,6 +91,7 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) MDB_cursor *mc; MDB_stat ms; MDB_val key, data; + MDB_envinfo info; unsigned int flags; int rc, i; @@ -100,11 +101,18 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) rc = mdb_stat(txn, dbi, &ms); if (rc) return rc; + rc = mdb_env_info(mdb_txn_env(txn), &info); + if (rc) return rc; + printf("VERSION=3\n"); printf("format=%s\n", mode & PRINT ? "print" : "bytevalue"); if (name) printf("database=%s\n", name); printf("type=btree\n"); + printf("mapsize=%zu\n", info.me_mapsize); + if (info.me_mapaddr) + printf("mapaddr=%p\n", info.me_mapaddr); + printf("maxreaders=%u\n", info.me_maxreaders); if (flags & MDB_DUPSORT) printf("duplicates=1\n"); diff --git a/libraries/liblmdb/mdb_load.c b/libraries/liblmdb/mdb_load.c index e33dd5e1cc..ec9f9ad341 100644 --- a/libraries/liblmdb/mdb_load.c +++ b/libraries/liblmdb/mdb_load.c @@ -34,6 +34,8 @@ static char *prog; static int eof; +static MDB_envinfo info; + static MDB_val kbuf, dbuf; #define STRLENOF(s) (sizeof(s)-1) @@ -92,6 +94,36 @@ static void readhdr() prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); exit(EXIT_FAILURE); } + } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); + if (i != 1) { + fprintf(stderr, "%s: line %zd: invalid mapaddr %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%zu", &info.me_mapsize); + if (i != 1) { + fprintf(stderr, "%s: line %zd: invalid mapsize %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); + if (i != 1) { + fprintf(stderr, "%s: line %zd: invalid maxreaders %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); + exit(EXIT_FAILURE); + } } else { int i; for (i=0; dbflags[i].bit; i++) { @@ -251,6 +283,7 @@ int main(int argc, char *argv[]) MDB_dbi dbi; char *envname; int envflags = 0, putflags = 0; + int dohdr = 0; prog = argv[0]; @@ -298,11 +331,26 @@ int main(int argc, char *argv[]) if (optind != argc - 1) usage(prog); + dbuf.mv_size = 4096; + dbuf.mv_data = malloc(dbuf.mv_size); + + if (!(mode & NOHDR)) + readhdr(); + envname = argv[optind]; rc = mdb_env_create(&env); mdb_env_set_maxdbs(env, 2); + if (info.me_maxreaders) + mdb_env_set_maxreaders(env, info.me_maxreaders); + + if (info.me_mapsize) + mdb_env_set_mapsize(env, info.me_mapsize); + + if (info.me_mapaddr) + envflags |= MDB_FIXEDMAP; + rc = mdb_env_open(env, envname, envflags, 0664); if (rc) { fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); @@ -311,15 +359,15 @@ int main(int argc, char *argv[]) kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; kbuf.mv_data = malloc(kbuf.mv_size); - dbuf.mv_size = 4096; - dbuf.mv_data = malloc(dbuf.mv_size); while(!eof) { MDB_val key, data; int batch = 0; flags = 0; - if (!(mode & NOHDR)) + if (!dohdr) { + dohdr = 1; + } else if (!(mode & NOHDR)) readhdr(); rc = mdb_txn_begin(env, NULL, 0, &txn); From 4009c9ae65144e3da5879477ba8696908593c26a Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 1 Jul 2014 21:45:43 -0700 Subject: [PATCH 08/24] Add mdb_env_copy2() And mdb_env_copyfd2(). Perform compaction on the copy. Trims out freed pages and renumbers data pages in sequential order. This is more CPU-intensive since it copies and modifies data pages. --- libraries/liblmdb/lmdb.h | 37 ++ libraries/liblmdb/mdb.c | 668 ++++++++++++++++++++++++++--------- libraries/liblmdb/mdb_copy.1 | 7 + libraries/liblmdb/mdb_copy.c | 20 +- 4 files changed, 557 insertions(+), 175 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index 98d9cc1e2c..b5791795e6 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -622,6 +622,43 @@ int mdb_env_copy(MDB_env *env, const char *path); */ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); + /** @brief Copy an LMDB environment to the specified path, with compaction. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. Unlike + * #mdb_env_copy(), which copies all pages from the environment, this + * function trims freed/unused pages from the copy and reorders leaf + * pages in sequential order. This function may execute more slowly + * than #mdb_env_copy() and will use more CPU time. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy2(MDB_env *env, const char *path); + + /** @brief Copy an LMDB environment to the specified file descriptor, + * with compaction. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. See + * #mdb_env_copy2() for further details. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd); + /** @brief Return statistics about the LMDB environment. * * @param[in] env An environment handle returned by #mdb_env_create() diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 750c2bb161..9a8e60c312 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -3301,6 +3301,20 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) return 0; } +static void +mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) +{ + meta->mm_magic = MDB_MAGIC; + meta->mm_version = MDB_DATA_VERSION; + meta->mm_mapsize = env->me_mapsize; + meta->mm_psize = env->me_psize; + meta->mm_last_pg = 1; + meta->mm_flags = env->me_flags & 0xffff; + meta->mm_flags |= MDB_INTEGERKEY; + meta->mm_dbs[0].md_root = P_INVALID; + meta->mm_dbs[1].md_root = P_INVALID; +} + /** Write the environment parameters of a freshly created DB environment. * @param[in] env the environment handle * @param[out] meta address of where to store the meta information @@ -3330,15 +3344,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) psize = env->me_psize; - meta->mm_magic = MDB_MAGIC; - meta->mm_version = MDB_DATA_VERSION; - meta->mm_mapsize = env->me_mapsize; - meta->mm_psize = psize; - meta->mm_last_pg = 1; - meta->mm_flags = env->me_flags & 0xffff; - meta->mm_flags |= MDB_INTEGERKEY; - meta->mm_dbs[0].md_root = P_INVALID; - meta->mm_dbs[1].md_root = P_INVALID; + mdb_env_init_meta0(env, meta); p = calloc(2, psize); p->mp_pgno = 0; @@ -4443,167 +4449,6 @@ mdb_env_close0(MDB_env *env, int excl) env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); } -int -mdb_env_copyfd(MDB_env *env, HANDLE fd) -{ - MDB_txn *txn = NULL; - int rc; - size_t wsize; - char *ptr; -#ifdef _WIN32 - DWORD len, w2; -#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) -#else - ssize_t len; - size_t w2; -#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) -#endif - - /* Do the lock/unlock of the reader mutex before starting the - * write txn. Otherwise other read txns could block writers. - */ - rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - if (rc) - return rc; - - if (env->me_txns) { - /* We must start the actual read txn after blocking writers */ - mdb_txn_reset0(txn, "reset-stage1"); - - /* Temporarily block writers until we snapshot the meta pages */ - LOCK_MUTEX_W(env); - - rc = mdb_txn_renew0(txn); - if (rc) { - UNLOCK_MUTEX_W(env); - goto leave; - } - } - - wsize = env->me_psize * 2; - ptr = env->me_map; - w2 = wsize; - while (w2 > 0) { - DO_WRITE(rc, fd, ptr, w2, len); - if (!rc) { - rc = ErrCode(); - break; - } else if (len > 0) { - rc = MDB_SUCCESS; - ptr += len; - w2 -= len; - continue; - } else { - /* Non-blocking or async handles are not supported */ - rc = EIO; - break; - } - } - if (env->me_txns) - UNLOCK_MUTEX_W(env); - - if (rc) - goto leave; - - w2 = txn->mt_next_pgno * env->me_psize; -#ifdef WIN32 - { - LARGE_INTEGER fsize; - GetFileSizeEx(env->me_fd, &fsize); - if (w2 > fsize.QuadPart) - w2 = fsize.QuadPart; - } -#else - { - struct stat st; - fstat(env->me_fd, &st); - if (w2 > (size_t)st.st_size) - w2 = st.st_size; - } -#endif - wsize = w2 - wsize; - while (wsize > 0) { - if (wsize > MAX_WRITE) - w2 = MAX_WRITE; - else - w2 = wsize; - DO_WRITE(rc, fd, ptr, w2, len); - if (!rc) { - rc = ErrCode(); - break; - } else if (len > 0) { - rc = MDB_SUCCESS; - ptr += len; - wsize -= len; - continue; - } else { - rc = EIO; - break; - } - } - -leave: - mdb_txn_abort(txn); - return rc; -} - -int -mdb_env_copy(MDB_env *env, const char *path) -{ - int rc, len; - char *lpath; - HANDLE newfd = INVALID_HANDLE_VALUE; - - if (env->me_flags & MDB_NOSUBDIR) { - lpath = (char *)path; - } else { - len = strlen(path); - len += sizeof(DATANAME); - lpath = malloc(len); - if (!lpath) - return ENOMEM; - sprintf(lpath, "%s" DATANAME, path); - } - - /* The destination path must exist, but the destination file must not. - * We don't want the OS to cache the writes, since the source data is - * already in the OS cache. - */ -#ifdef _WIN32 - newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, - FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); -#else - newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); -#endif - if (newfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } - -#ifdef O_DIRECT - /* Set O_DIRECT if the file system supports it */ - if ((rc = fcntl(newfd, F_GETFL)) != -1) - (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); -#endif -#ifdef F_NOCACHE /* __APPLE__ */ - rc = fcntl(newfd, F_NOCACHE, 1); - if (rc) { - rc = ErrCode(); - goto leave; - } -#endif - - rc = mdb_env_copyfd(env, newfd); - -leave: - if (!(env->me_flags & MDB_NOSUBDIR)) - free(lpath); - if (newfd != INVALID_HANDLE_VALUE) - if (close(newfd) < 0 && rc == MDB_SUCCESS) - rc = ErrCode(); - - return rc; -} void mdb_env_close(MDB_env *env) @@ -8165,6 +8010,489 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, return mdb_cursor_put(&mc, key, data, flags); } +#define WBUF (64*1024) + +typedef struct mdb_copy { + pthread_mutex_t mc_mutex[2]; + char *mc_wbuf[2]; + char *mc_over[2]; + void *mc_obuf[2]; + void *mc_free; + MDB_env *mc_env; + MDB_txn *mc_txn; + int mc_wlen[2]; + int mc_olen[2]; + pgno_t mc_next_pgno; + HANDLE mc_fd; + int mc_status; + int mc_toggle; +} mdb_copy; + +static void * +mdb_env_copythr(void *arg) +{ + mdb_copy *my = arg; + char *ptr; + int wsize; + int toggle = 0, len, rc; +#ifdef _WIN32 +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + for(;;) { + pthread_mutex_lock(&my->mc_mutex[toggle]); + if (!my->mc_wlen[toggle]) { + pthread_mutex_unlock(&my->mc_mutex[toggle]); + break; + } + wsize = my->mc_wlen[toggle]; + ptr = my->mc_wbuf[toggle]; +again: + while (wsize > 0) { + DO_WRITE(rc, my->mc_fd, ptr, wsize, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + if (rc) { + my->mc_status = rc; + pthread_mutex_unlock(&my->mc_mutex[toggle]); + break; + } + /* If there's an overflow page tail, write it too */ + if (my->mc_olen[toggle]) { + wsize = my->mc_olen[toggle]; + ptr = my->mc_over[toggle]; + my->mc_olen[toggle] = 0; + goto again; + } + pthread_mutex_unlock(&my->mc_mutex[toggle]); + toggle ^= 1; + } + return NULL; +#undef DO_WRITE +} + +static int +mdb_env_cthr_toggle(mdb_copy *my) +{ + int toggle = my->mc_toggle ^ 1; + + pthread_mutex_unlock(&my->mc_mutex[my->mc_toggle]); + pthread_mutex_lock(&my->mc_mutex[toggle]); + if (my->mc_status) { + pthread_mutex_unlock(&my->mc_mutex[toggle]); + return my->mc_status; + } + my->mc_wlen[toggle] = 0; + my->mc_olen[toggle] = 0; + my->mc_toggle = toggle; + return 0; +} + +static int +mdb_env_cwalk(mdb_copy *my, pgno_t pg) +{ + MDB_cursor mc; + MDB_txn *txn = my->mc_txn; + MDB_node *ni; + MDB_page *mo, *mp; + char *buf, *ptr; + int rc, toggle; + unsigned int i; + + mc.mc_snum = 1; + mc.mc_top = 0; + mc.mc_txn = txn; + + rc = mdb_page_get(my->mc_txn, pg, &mc.mc_pg[0], NULL); + if (rc) + return rc; + rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST); + if (rc) + return rc; + + /* Make cursor pages writable */ + buf = ptr = malloc(my->mc_env->me_psize * mc.mc_top); + if (buf == NULL) + return ENOMEM; + + for (i=0; imc_env->me_psize); + mc.mc_pg[i] = (MDB_page *)ptr; + ptr += my->mc_env->me_psize; + } + + toggle = my->mc_toggle; + while (mc.mc_snum > 0) { + unsigned n; + mp = mc.mc_pg[mc.mc_top]; + n = NUMKEYS(mp); + if (IS_LEAF(mp)) { + for (i=0; imn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc) + goto done; + if (my->mc_wlen[toggle] >= WBUF) { + rc = mdb_env_cthr_toggle(my); + if (rc) + goto done; + toggle ^= 1; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + memcpy(mo, omp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno; + my->mc_next_pgno += omp->mp_pages; + my->mc_wlen[toggle] += my->mc_env->me_psize; + my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); + my->mc_obuf[toggle] = (char *)omp + my->mc_env->me_psize; + rc = mdb_env_cthr_toggle(my); + if (rc) + goto done; + toggle ^= 1; + } else if (ni->mn_flags & F_SUBDATA) { + MDB_db db; + memcpy(&db, NODEDATA(ni), sizeof(db)); + my->mc_toggle = toggle; + rc = mdb_env_cwalk(my, db.md_root); + if (rc) + goto done; + toggle = my->mc_toggle; + } + } + } else { + mc.mc_ki[mc.mc_top]++; + if (mc.mc_ki[mc.mc_top] < n) { + pgno_t pg; +again: + ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]); + pg = NODEPGNO(ni); + rc = mdb_page_get(txn, pg, &mp, NULL); + if (rc) + goto done; + mc.mc_top++; + mc.mc_snum++; + mc.mc_ki[mc.mc_top] = 0; + if (IS_BRANCH(mp)) { + mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize); + goto again; + } else + mc.mc_pg[mc.mc_top] = mp; + continue; + } + } + if (mc.mc_top) { + ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]); + SETPGNO(ni, my->mc_next_pgno); + } + if (my->mc_wlen[toggle] >= WBUF) { + rc = mdb_env_cthr_toggle(my); + if (rc) + goto done; + toggle ^= 1; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + mdb_page_copy(mo, mp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno++; + my->mc_wlen[toggle] += my->mc_env->me_psize; + mdb_cursor_pop(&mc); + } +done: + free(buf); + return rc; +} + +int +mdb_env_copyfd2(MDB_env *env, HANDLE fd) +{ + MDB_meta *mm; + MDB_page *mp; + mdb_copy my; + MDB_txn *txn = NULL; + pthread_t thr; + int rc; + + rc = posix_memalign(&my.mc_free, env->me_psize, WBUF*2); + if (rc) + return rc; + my.mc_wbuf[0] = my.mc_free; + my.mc_wbuf[1] = my.mc_free + WBUF; + pthread_mutex_init(&my.mc_mutex[0], NULL); + pthread_mutex_init(&my.mc_mutex[1], NULL); + my.mc_wlen[0] = 0; + my.mc_wlen[1] = 0; + my.mc_olen[0] = 0; + my.mc_olen[1] = 0; + my.mc_next_pgno = 2; + my.mc_status = 0; + my.mc_toggle = 0; + my.mc_env = env; + my.mc_fd = fd; + pthread_mutex_lock(&my.mc_mutex[0]); + + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. + */ + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + if (env->me_txns) { + /* We must start the actual read txn after blocking writers */ + mdb_txn_reset0(txn, "reset-stage1"); + + /* Temporarily block writers until we snapshot the meta pages */ + LOCK_MUTEX_W(env); + + rc = mdb_txn_renew0(txn); + if (rc) { + UNLOCK_MUTEX_W(env); + goto leave; + } + } + + mp = (MDB_page *)my.mc_wbuf[0]; + memset(mp, 0, 2*env->me_psize); + mp->mp_pgno = 0; + mp->mp_flags = P_META; + mm = (MDB_meta *)METADATA(mp); + mdb_env_init_meta0(env, mm); + mm->mm_address = env->me_metas[0]->mm_address; + + mp = (MDB_page *)(my.mc_wbuf[0] + env->me_psize); + mp->mp_pgno = 1; + mp->mp_flags = P_META; + *(MDB_meta *)METADATA(mp) = *mm; + mm = (MDB_meta *)METADATA(mp); + + /* Count the number of free pages, subtract from lastpg to find + * number of active pages + */ + { + MDB_ID freecount = 0; + MDB_cursor mc; + MDB_val key, data; + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) + freecount += *(MDB_ID *)data.mv_data; + freecount += txn->mt_dbs[0].md_branch_pages + + txn->mt_dbs[0].md_leaf_pages + + txn->mt_dbs[0].md_overflow_pages; + + /* Set metapage 1 */ + mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; + mm->mm_dbs[1] = txn->mt_dbs[1]; + mm->mm_dbs[1].md_root = mm->mm_last_pg; + mm->mm_txnid = 1; + } + my.mc_wlen[0] = env->me_psize * 2; + my.mc_txn = txn; + pthread_create(&thr, NULL, mdb_env_copythr, &my); + rc = mdb_env_cwalk(&my, txn->mt_dbs[1].md_root); + if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) + rc = mdb_env_cthr_toggle(&my); + my.mc_wlen[my.mc_toggle] = 0; + pthread_mutex_unlock(&my.mc_mutex[my.mc_toggle]); + pthread_join(thr, NULL); +leave: + mdb_txn_abort(txn); + free(my.mc_free); + return rc; +} + +int +mdb_env_copyfd(MDB_env *env, HANDLE fd) +{ + MDB_txn *txn = NULL; + int rc; + size_t wsize; + char *ptr; +#ifdef _WIN32 + DWORD len, w2; +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else + ssize_t len; + size_t w2; +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. + */ + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + if (env->me_txns) { + /* We must start the actual read txn after blocking writers */ + mdb_txn_reset0(txn, "reset-stage1"); + + /* Temporarily block writers until we snapshot the meta pages */ + LOCK_MUTEX_W(env); + + rc = mdb_txn_renew0(txn); + if (rc) { + UNLOCK_MUTEX_W(env); + goto leave; + } + } + + wsize = env->me_psize * 2; + ptr = env->me_map; + w2 = wsize; + while (w2 > 0) { + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + w2 -= len; + continue; + } else { + /* Non-blocking or async handles are not supported */ + rc = EIO; + break; + } + } + if (env->me_txns) + UNLOCK_MUTEX_W(env); + + if (rc) + goto leave; + + w2 = txn->mt_next_pgno * env->me_psize; +#ifdef WIN32 + { + LARGE_INTEGER fsize; + GetFileSizeEx(env->me_fd, &fsize); + if (w2 > fsize.QuadPart) + w2 = fsize.QuadPart; + } +#else + { + struct stat st; + fstat(env->me_fd, &st); + if (w2 > (size_t)st.st_size) + w2 = st.st_size; + } +#endif + wsize = w2 - wsize; + while (wsize > 0) { + if (wsize > MAX_WRITE) + w2 = MAX_WRITE; + else + w2 = wsize; + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + +leave: + mdb_txn_abort(txn); + return rc; +} + +static int +mdb_env_copy0(MDB_env *env, const char *path, int flag) +{ + int rc, len; + char *lpath; + HANDLE newfd = INVALID_HANDLE_VALUE; + + if (env->me_flags & MDB_NOSUBDIR) { + lpath = (char *)path; + } else { + len = strlen(path); + len += sizeof(DATANAME); + lpath = malloc(len); + if (!lpath) + return ENOMEM; + sprintf(lpath, "%s" DATANAME, path); + } + + /* The destination path must exist, but the destination file must not. + * We don't want the OS to cache the writes, since the source data is + * already in the OS cache. + */ +#ifdef _WIN32 + newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); +#else + newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); +#endif + if (newfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + +#ifdef O_DIRECT + /* Set O_DIRECT if the file system supports it */ + if ((rc = fcntl(newfd, F_GETFL)) != -1) + (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); +#endif +#ifdef F_NOCACHE /* __APPLE__ */ + rc = fcntl(newfd, F_NOCACHE, 1); + if (rc) { + rc = ErrCode(); + goto leave; + } +#endif + + if (flag) + rc = mdb_env_copyfd2(env, newfd); + else + rc = mdb_env_copyfd(env, newfd); + +leave: + if (!(env->me_flags & MDB_NOSUBDIR)) + free(lpath); + if (newfd != INVALID_HANDLE_VALUE) + if (close(newfd) < 0 && rc == MDB_SUCCESS) + rc = ErrCode(); + + return rc; +} + +int +mdb_env_copy(MDB_env *env, const char *path) +{ + return mdb_env_copy0(env, path, 0); +} + +int +mdb_env_copy2(MDB_env *env, const char *path) +{ + return mdb_env_copy0(env, path, 1); +} + int mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) { diff --git a/libraries/liblmdb/mdb_copy.1 b/libraries/liblmdb/mdb_copy.1 index 58c6c5b60c..094b260563 100644 --- a/libraries/liblmdb/mdb_copy.1 +++ b/libraries/liblmdb/mdb_copy.1 @@ -8,6 +8,8 @@ mdb_copy \- LMDB environment copy tool [\c .BR \-V ] [\c +.BR \-c ] +[\c .BR \-n ] .B srcpath [\c @@ -30,6 +32,11 @@ written to stdout. .BR \-V Write the library version number to the standard output, and exit. .TP +.BR \-c +Compact while copying. Only current data pages will be copied; freed +or unused pages will be omitted from the copy. This option will +slow down the backup process as it is more CPU-intensive. +.TP .BR \-n Open LDMB environment(s) which do not use subdirectories. diff --git a/libraries/liblmdb/mdb_copy.c b/libraries/liblmdb/mdb_copy.c index 87525c0682..0814519d4d 100644 --- a/libraries/liblmdb/mdb_copy.c +++ b/libraries/liblmdb/mdb_copy.c @@ -33,10 +33,13 @@ int main(int argc,char * argv[]) MDB_env *env; const char *progname = argv[0], *act; unsigned flags = MDB_RDONLY; + int compact = 0; for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) { if (argv[1][1] == 'n' && argv[1][2] == '\0') flags |= MDB_NOSUBDIR; + else if (argv[1][1] == 'c' && argv[1][2] == '\0') + compact = 1; else if (argv[1][1] == 'V' && argv[1][2] == '\0') { printf("%s\n", MDB_VERSION_STRING); exit(0); @@ -45,7 +48,7 @@ int main(int argc,char * argv[]) } if (argc<2 || argc>3) { - fprintf(stderr, "usage: %s [-V] [-n] srcpath [dstpath]\n", progname); + fprintf(stderr, "usage: %s [-V] [-c] [-n] srcpath [dstpath]\n", progname); exit(EXIT_FAILURE); } @@ -65,10 +68,17 @@ int main(int argc,char * argv[]) } if (rc == MDB_SUCCESS) { act = "copying"; - if (argc == 2) - rc = mdb_env_copyfd(env, MDB_STDOUT); - else - rc = mdb_env_copy(env, argv[2]); + if (compact) { + if (argc == 2) + rc = mdb_env_copyfd2(env, MDB_STDOUT); + else + rc = mdb_env_copy2(env, argv[2]); + } else { + if (argc == 2) + rc = mdb_env_copyfd(env, MDB_STDOUT); + else + rc = mdb_env_copy(env, argv[2]); + } } if (rc) fprintf(stderr, "%s: %s failed, error %d (%s)\n", From 342cd72f2574a050469a8f37b0bf54786ad9c954 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 2 Jul 2014 07:45:12 -0700 Subject: [PATCH 09/24] Don't check LEAF2 pages for subnodes --- libraries/liblmdb/mdb.c | 61 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 9a8e60c312..6ea9ebc7e6 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8139,41 +8139,44 @@ mdb_env_cwalk(mdb_copy *my, pgno_t pg) unsigned n; mp = mc.mc_pg[mc.mc_top]; n = NUMKEYS(mp); + if (IS_LEAF(mp)) { - for (i=0; imn_flags & F_BIGDATA) { - MDB_page *omp; - pgno_t pg; - memcpy(&pg, NODEDATA(ni), sizeof(pg)); - rc = mdb_page_get(txn, pg, &omp, NULL); - if (rc) - goto done; - if (my->mc_wlen[toggle] >= WBUF) { + if (!IS_LEAF2(mp)) { + for (i=0; imn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc) + goto done; + if (my->mc_wlen[toggle] >= WBUF) { + rc = mdb_env_cthr_toggle(my); + if (rc) + goto done; + toggle ^= 1; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + memcpy(mo, omp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno; + my->mc_next_pgno += omp->mp_pages; + my->mc_wlen[toggle] += my->mc_env->me_psize; + my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); + my->mc_obuf[toggle] = (char *)omp + my->mc_env->me_psize; rc = mdb_env_cthr_toggle(my); if (rc) goto done; toggle ^= 1; + } else if (ni->mn_flags & F_SUBDATA) { + MDB_db db; + memcpy(&db, NODEDATA(ni), sizeof(db)); + my->mc_toggle = toggle; + rc = mdb_env_cwalk(my, db.md_root); + if (rc) + goto done; + toggle = my->mc_toggle; } - mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); - memcpy(mo, omp, my->mc_env->me_psize); - mo->mp_pgno = my->mc_next_pgno; - my->mc_next_pgno += omp->mp_pages; - my->mc_wlen[toggle] += my->mc_env->me_psize; - my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); - my->mc_obuf[toggle] = (char *)omp + my->mc_env->me_psize; - rc = mdb_env_cthr_toggle(my); - if (rc) - goto done; - toggle ^= 1; - } else if (ni->mn_flags & F_SUBDATA) { - MDB_db db; - memcpy(&db, NODEDATA(ni), sizeof(db)); - my->mc_toggle = toggle; - rc = mdb_env_cwalk(my, db.md_root); - if (rc) - goto done; - toggle = my->mc_toggle; } } } else { From af3c3407585fe9a75e750434135645512f58d846 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 2 Jul 2014 08:06:19 -0700 Subject: [PATCH 10/24] Larger WBUF still better on large DB --- libraries/liblmdb/mdb.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 6ea9ebc7e6..5becba299a 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8010,7 +8010,9 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, return mdb_cursor_put(&mc, key, data, flags); } -#define WBUF (64*1024) +#ifndef MDB_WBUF +#define MDB_WBUF (1024*1024) +#endif typedef struct mdb_copy { pthread_mutex_t mc_mutex[2]; @@ -8151,7 +8153,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t pg) rc = mdb_page_get(txn, pg, &omp, NULL); if (rc) goto done; - if (my->mc_wlen[toggle] >= WBUF) { + if (my->mc_wlen[toggle] >= MDB_WBUF) { rc = mdb_env_cthr_toggle(my); if (rc) goto done; @@ -8204,7 +8206,7 @@ again: ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]); SETPGNO(ni, my->mc_next_pgno); } - if (my->mc_wlen[toggle] >= WBUF) { + if (my->mc_wlen[toggle] >= MDB_WBUF) { rc = mdb_env_cthr_toggle(my); if (rc) goto done; @@ -8231,11 +8233,11 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) pthread_t thr; int rc; - rc = posix_memalign(&my.mc_free, env->me_psize, WBUF*2); + rc = posix_memalign(&my.mc_free, env->me_psize, MDB_WBUF*2); if (rc) return rc; my.mc_wbuf[0] = my.mc_free; - my.mc_wbuf[1] = my.mc_free + WBUF; + my.mc_wbuf[1] = my.mc_free + MDB_WBUF; pthread_mutex_init(&my.mc_mutex[0], NULL); pthread_mutex_init(&my.mc_mutex[1], NULL); my.mc_wlen[0] = 0; From 8836b78e945d6cabb0e21c04f89a40bd0478359f Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 2 Jul 2014 11:44:06 -0700 Subject: [PATCH 11/24] Fix subDB/ovpage copying --- libraries/liblmdb/mdb.c | 57 ++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 5becba299a..9d24aef61b 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8018,7 +8018,6 @@ typedef struct mdb_copy { pthread_mutex_t mc_mutex[2]; char *mc_wbuf[2]; char *mc_over[2]; - void *mc_obuf[2]; void *mc_free; MDB_env *mc_env; MDB_txn *mc_txn; @@ -8043,8 +8042,10 @@ mdb_env_copythr(void *arg) #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) #endif + pthread_mutex_lock(&my->mc_mutex[toggle^1]); for(;;) { pthread_mutex_lock(&my->mc_mutex[toggle]); + pthread_mutex_unlock(&my->mc_mutex[toggle^1]); if (!my->mc_wlen[toggle]) { pthread_mutex_unlock(&my->mc_mutex[toggle]); break; @@ -8067,6 +8068,7 @@ again: break; } } + my->mc_wlen[toggle] = wsize; if (rc) { my->mc_status = rc; pthread_mutex_unlock(&my->mc_mutex[toggle]); @@ -8079,7 +8081,6 @@ again: my->mc_olen[toggle] = 0; goto again; } - pthread_mutex_unlock(&my->mc_mutex[toggle]); toggle ^= 1; } return NULL; @@ -8104,12 +8105,12 @@ mdb_env_cthr_toggle(mdb_copy *my) } static int -mdb_env_cwalk(mdb_copy *my, pgno_t pg) +mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) { MDB_cursor mc; MDB_txn *txn = my->mc_txn; MDB_node *ni; - MDB_page *mo, *mp; + MDB_page *mo, *mp, *leaf; char *buf, *ptr; int rc, toggle; unsigned int i; @@ -8118,7 +8119,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t pg) mc.mc_top = 0; mc.mc_txn = txn; - rc = mdb_page_get(my->mc_txn, pg, &mc.mc_pg[0], NULL); + rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL); if (rc) return rc; rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST); @@ -8126,7 +8127,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t pg) return rc; /* Make cursor pages writable */ - buf = ptr = malloc(my->mc_env->me_psize * mc.mc_top); + buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum); if (buf == NULL) return ENOMEM; @@ -8136,6 +8137,9 @@ mdb_env_cwalk(mdb_copy *my, pgno_t pg) ptr += my->mc_env->me_psize; } + /* This is writable space for a leaf page. Usually not needed. */ + leaf = (MDB_page *)ptr; + toggle = my->mc_toggle; while (mc.mc_snum > 0) { unsigned n; @@ -8143,12 +8147,21 @@ mdb_env_cwalk(mdb_copy *my, pgno_t pg) n = NUMKEYS(mp); if (IS_LEAF(mp)) { - if (!IS_LEAF2(mp)) { + if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) { for (i=0; imn_flags & F_BIGDATA) { MDB_page *omp; pgno_t pg; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + memcpy(&pg, NODEDATA(ni), sizeof(pg)); rc = mdb_page_get(txn, pg, &omp, NULL); if (rc) @@ -8164,20 +8177,33 @@ mdb_env_cwalk(mdb_copy *my, pgno_t pg) mo->mp_pgno = my->mc_next_pgno; my->mc_next_pgno += omp->mp_pages; my->mc_wlen[toggle] += my->mc_env->me_psize; - my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); - my->mc_obuf[toggle] = (char *)omp + my->mc_env->me_psize; - rc = mdb_env_cthr_toggle(my); - if (rc) - goto done; - toggle ^= 1; + if (omp->mp_pages > 1) { + my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); + my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize; + rc = mdb_env_cthr_toggle(my); + if (rc) + goto done; + toggle ^= 1; + } + memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); } else if (ni->mn_flags & F_SUBDATA) { MDB_db db; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + memcpy(&db, NODEDATA(ni), sizeof(db)); my->mc_toggle = toggle; - rc = mdb_env_cwalk(my, db.md_root); + rc = mdb_env_cwalk(my, &db.md_root, ni->mn_flags & F_DUPDATA); if (rc) goto done; toggle = my->mc_toggle; + memcpy(NODEDATA(ni), &db, sizeof(db)); } } } @@ -8218,6 +8244,7 @@ again: my->mc_wlen[toggle] += my->mc_env->me_psize; mdb_cursor_pop(&mc); } + *pg = mo->mp_pgno; done: free(buf); return rc; @@ -8309,7 +8336,7 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) my.mc_wlen[0] = env->me_psize * 2; my.mc_txn = txn; pthread_create(&thr, NULL, mdb_env_copythr, &my); - rc = mdb_env_cwalk(&my, txn->mt_dbs[1].md_root); + rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) rc = mdb_env_cthr_toggle(&my); my.mc_wlen[my.mc_toggle] = 0; From 64191264b8e8582a9d70239c3ad5dd36464ca7b4 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 3 Jul 2014 07:49:22 -0700 Subject: [PATCH 12/24] Windows compat --- libraries/liblmdb/mdb.c | 94 +++++++++++++++++++++++++----------- libraries/liblmdb/mdb_dump.c | 8 ++- libraries/liblmdb/mdb_load.c | 52 +++++++++++--------- 3 files changed, 102 insertions(+), 52 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 9d24aef61b..a90372f34a 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -35,15 +35,17 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif -#include -#include #ifdef _WIN32 +#include #include /** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it * as int64 which is wrong. MSVC doesn't define it at all, so just * don't use it. */ #define MDB_PID_T int +#define MDB_THR_T DWORD +#include +#include #ifdef __GNUC__ # include #else @@ -55,7 +57,10 @@ # endif #endif #else +#include +#include #define MDB_PID_T pid_t +#define MDB_THR_T pthread_t #include #include #include @@ -169,7 +174,8 @@ #ifdef _WIN32 #define MDB_USE_HASH 1 #define MDB_PIDLOCK 0 -#define pthread_t DWORD +#define THREAD_RET DWORD +#define pthread_t HANDLE #define pthread_mutex_t HANDLE #define pthread_key_t DWORD #define pthread_self() GetCurrentThreadId() @@ -180,6 +186,8 @@ #define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) #define pthread_mutex_unlock(x) ReleaseMutex(x) #define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) +#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) +#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) #define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) #define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex) #define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex) @@ -198,7 +206,9 @@ #endif #define Z "I" #else - +#define THREAD_RET void * +#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg) +#define THREAD_FINISH(thr) pthread_join(thr,NULL) #define Z "z" /**< printf format modifier for size_t */ /** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */ @@ -537,7 +547,7 @@ typedef struct MDB_rxbody { /** The process ID of the process owning this reader txn. */ MDB_PID_T mrb_pid; /** The thread ID of the thread owning this txn. */ - pthread_t mrb_tid; + MDB_THR_T mrb_tid; } MDB_rxbody; /** The actual reader record, with cacheline padding. */ @@ -2387,7 +2397,7 @@ mdb_txn_renew0(MDB_txn *txn) return MDB_BAD_RSLOT; } else { MDB_PID_T pid = env->me_pid; - pthread_t tid = pthread_self(); + MDB_THR_T tid = pthread_self(); if (!env->me_live_reader) { rc = mdb_reader_pid(env, Pidset, pid); @@ -3534,8 +3544,17 @@ mdb_env_map(MDB_env *env, void *addr, int newsize) int rc; HANDLE mh; LONG sizelo, sizehi; - sizelo = env->me_mapsize & 0xffffffff; - sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */ + size_t msize; + + if (flags & MDB_RDONLY) { + msize = 0; + sizelo = 0; + sizehi = 0; + } else { + msize = env->me_mapsize; + sizelo = msize & 0xffffffff; + sizehi = msize >> 16 >> 16; /* only needed on Win64 */ + } /* Windows won't create mappings for zero length files. * Just allocate the maxsize right now. @@ -3553,7 +3572,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize) return ErrCode(); env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? FILE_MAP_WRITE : FILE_MAP_READ, - 0, 0, env->me_mapsize, addr); + 0, 0, msize, addr); rc = env->me_map ? 0 : ErrCode(); CloseHandle(mh); if (rc) @@ -8018,7 +8037,6 @@ typedef struct mdb_copy { pthread_mutex_t mc_mutex[2]; char *mc_wbuf[2]; char *mc_over[2]; - void *mc_free; MDB_env *mc_env; MDB_txn *mc_txn; int mc_wlen[2]; @@ -8029,16 +8047,17 @@ typedef struct mdb_copy { int mc_toggle; } mdb_copy; -static void * +static THREAD_RET mdb_env_copythr(void *arg) { mdb_copy *my = arg; char *ptr; - int wsize; - int toggle = 0, len, rc; + int toggle = 0, wsize, rc; #ifdef _WIN32 + DWORD len; #define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) #else + int len; #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) #endif @@ -8083,7 +8102,7 @@ again: } toggle ^= 1; } - return NULL; + return (THREAD_RET)0; #undef DO_WRITE } @@ -8228,10 +8247,6 @@ again: continue; } } - if (mc.mc_top) { - ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]); - SETPGNO(ni, my->mc_next_pgno); - } if (my->mc_wlen[toggle] >= MDB_WBUF) { rc = mdb_env_cthr_toggle(my); if (rc) @@ -8242,9 +8257,17 @@ again: mdb_page_copy(mo, mp, my->mc_env->me_psize); mo->mp_pgno = my->mc_next_pgno++; my->mc_wlen[toggle] += my->mc_env->me_psize; - mdb_cursor_pop(&mc); + if (mc.mc_top) { + /* Update parent if there is one */ + ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]); + SETPGNO(ni, mo->mp_pgno); + mdb_cursor_pop(&mc); + } else { + /* Otherwise we're done */ + *pg = mo->mp_pgno; + break; + } } - *pg = mo->mp_pgno; done: free(buf); return rc; @@ -8260,13 +8283,20 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) pthread_t thr; int rc; - rc = posix_memalign(&my.mc_free, env->me_psize, MDB_WBUF*2); - if (rc) - return rc; - my.mc_wbuf[0] = my.mc_free; - my.mc_wbuf[1] = my.mc_free + MDB_WBUF; +#ifdef _WIN32 + my.mc_mutex[0] = CreateMutex(NULL, FALSE, NULL); + my.mc_mutex[1] = CreateMutex(NULL, FALSE, NULL); + my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_psize); + if (my.mc_wbuf[0] == NULL) + return errno; +#else pthread_mutex_init(&my.mc_mutex[0], NULL); pthread_mutex_init(&my.mc_mutex[1], NULL); + rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_psize, MDB_WBUF*2); + if (rc) + return rc; +#endif + my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF; my.mc_wlen[0] = 0; my.mc_wlen[1] = 0; my.mc_olen[0] = 0; @@ -8335,16 +8365,24 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) } my.mc_wlen[0] = env->me_psize * 2; my.mc_txn = txn; - pthread_create(&thr, NULL, mdb_env_copythr, &my); + THREAD_CREATE(thr, mdb_env_copythr, &my); rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) rc = mdb_env_cthr_toggle(&my); my.mc_wlen[my.mc_toggle] = 0; pthread_mutex_unlock(&my.mc_mutex[my.mc_toggle]); - pthread_join(thr, NULL); + THREAD_FINISH(thr); leave: mdb_txn_abort(txn); - free(my.mc_free); +#ifdef _WIN32 + CloseHandle(my.mc_mutex[1]); + CloseHandle(my.mc_mutex[0]); + _aligned_free(my.mc_wbuf[0]); +#else + pthread_mutex_destroy(&my.mc_mutex[1]); + pthread_mutex_destroy(&my.mc_mutex[0]); + free(my.mc_wbuf[0]); +#endif return rc; } diff --git a/libraries/liblmdb/mdb_dump.c b/libraries/liblmdb/mdb_dump.c index 925532593b..3b01f9643d 100644 --- a/libraries/liblmdb/mdb_dump.c +++ b/libraries/liblmdb/mdb_dump.c @@ -20,6 +20,12 @@ #include #include "lmdb.h" +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + #define PRINT 1 static int mode; @@ -109,7 +115,7 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) if (name) printf("database=%s\n", name); printf("type=btree\n"); - printf("mapsize=%zu\n", info.me_mapsize); + printf("mapsize=%" Z "u\n", info.me_mapsize); if (info.me_mapaddr) printf("mapaddr=%p\n", info.me_mapaddr); printf("maxreaders=%u\n", info.me_maxreaders); diff --git a/libraries/liblmdb/mdb_load.c b/libraries/liblmdb/mdb_load.c index ec9f9ad341..17f4757330 100644 --- a/libraries/liblmdb/mdb_load.c +++ b/libraries/liblmdb/mdb_load.c @@ -32,12 +32,18 @@ static int flags; static char *prog; -static int eof; +static int Eof; static MDB_envinfo info; static MDB_val kbuf, dbuf; +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + #define STRLENOF(s) (sizeof(s)-1) typedef struct flagbit { @@ -69,7 +75,7 @@ static void readhdr() if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); if (version > 3) { - fprintf(stderr, "%s: line %zd: unsupported VERSION %d\n", + fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", prog, lineno, version); exit(EXIT_FAILURE); } @@ -79,7 +85,7 @@ static void readhdr() if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) mode |= PRINT; else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { - fprintf(stderr, "%s: line %zd: unsupported FORMAT %s\n", + fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); exit(EXIT_FAILURE); } @@ -90,7 +96,7 @@ static void readhdr() subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { - fprintf(stderr, "%s: line %zd: unsupported type %s\n", + fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); exit(EXIT_FAILURE); } @@ -100,7 +106,7 @@ static void readhdr() if (ptr) *ptr = '\0'; i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); if (i != 1) { - fprintf(stderr, "%s: line %zd: invalid mapaddr %s\n", + fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); exit(EXIT_FAILURE); } @@ -108,9 +114,9 @@ static void readhdr() int i; ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); if (ptr) *ptr = '\0'; - i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%zu", &info.me_mapsize); + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); if (i != 1) { - fprintf(stderr, "%s: line %zd: invalid mapsize %s\n", + fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); exit(EXIT_FAILURE); } @@ -120,7 +126,7 @@ static void readhdr() if (ptr) *ptr = '\0'; i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); if (i != 1) { - fprintf(stderr, "%s: line %zd: invalid maxreaders %s\n", + fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); exit(EXIT_FAILURE); } @@ -136,12 +142,12 @@ static void readhdr() if (!dbflags[i].bit) { ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); if (!ptr) { - fprintf(stderr, "%s: line %zd: unexpected format\n", + fprintf(stderr, "%s: line %" Z "d: unexpected format\n", prog, lineno); exit(EXIT_FAILURE); } else { *ptr = '\0'; - fprintf(stderr, "%s: line %zd: unrecognized keyword ignored: %s\n", + fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", prog, lineno, (char *)dbuf.mv_data); } } @@ -151,7 +157,7 @@ static void readhdr() static void badend() { - fprintf(stderr, "%s: line %zd: unexpected end of input\n", + fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", prog, lineno); } @@ -178,14 +184,14 @@ static int readline(MDB_val *out, MDB_val *buf) if (!(mode & NOHDR)) { c = fgetc(stdin); if (c == EOF) { - eof = 1; + Eof = 1; return EOF; } if (c != ' ') { lineno++; if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { badend: - eof = 1; + Eof = 1; badend(); return EOF; } @@ -195,7 +201,7 @@ badend: } } if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { - eof = 1; + Eof = 1; return EOF; } lineno++; @@ -207,15 +213,15 @@ badend: while (c1[len-1] != '\n') { buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); if (!buf->mv_data) { - eof = 1; - fprintf(stderr, "%s: line %zd: out of memory, line too long\n", + Eof = 1; + fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", prog, lineno); return EOF; } c1 = buf->mv_data; c1 += buf->mv_size; if (fgets((char *)c1, buf->mv_size, stdin) == NULL) { - eof = 1; + Eof = 1; badend(); return EOF; } @@ -234,7 +240,7 @@ badend: c1++; c2 += 2; } else { if (c2+3 >= end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { - eof = 1; + Eof = 1; badend(); return EOF; } @@ -248,13 +254,13 @@ badend: } else { /* odd length not allowed */ if (len & 1) { - eof = 1; + Eof = 1; badend(); return EOF; } while (c2 < end) { if (!isxdigit(*c2) || !isxdigit(c2[1])) { - eof = 1; + Eof = 1; badend(); return EOF; } @@ -360,7 +366,7 @@ int main(int argc, char *argv[]) kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; kbuf.mv_data = malloc(kbuf.mv_size); - while(!eof) { + while(!Eof) { MDB_val key, data; int batch = 0; flags = 0; @@ -408,7 +414,7 @@ int main(int argc, char *argv[]) if (batch == 100) { rc = mdb_txn_commit(txn); if (rc) { - fprintf(stderr, "%s: line %zd: txn_commit: %s\n", + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", prog, lineno, mdb_strerror(rc)); goto env_close; } @@ -428,7 +434,7 @@ int main(int argc, char *argv[]) rc = mdb_txn_commit(txn); txn = NULL; if (rc) { - fprintf(stderr, "%s: line %zd: txn_commit: %s\n", + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", prog, lineno, mdb_strerror(rc)); goto env_close; } From 6043103c1311e278b255f5881393cea4704af225 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 3 Jul 2014 08:31:57 -0700 Subject: [PATCH 13/24] Fix envinfo to return fixed map address --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index a90372f34a..d64c954990 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8671,7 +8671,7 @@ mdb_env_info(MDB_env *env, MDB_envinfo *arg) return EINVAL; toggle = mdb_env_pick_meta(env); - arg->me_mapaddr = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : 0; + arg->me_mapaddr = env->me_metas[toggle]->mm_address; arg->me_mapsize = env->me_mapsize; arg->me_maxreaders = env->me_maxreaders; From 03f0ecb0018487626bfa31d7b870b7b38517a88a Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 3 Jul 2014 10:49:05 -0700 Subject: [PATCH 14/24] More copyfd2 tweaks --- libraries/liblmdb/mdb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index d64c954990..6997ae9583 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8087,7 +8087,6 @@ again: break; } } - my->mc_wlen[toggle] = wsize; if (rc) { my->mc_status = rc; pthread_mutex_unlock(&my->mc_mutex[toggle]); @@ -8100,6 +8099,7 @@ again: my->mc_olen[toggle] = 0; goto again; } + my->mc_wlen[toggle] = 0; toggle ^= 1; } return (THREAD_RET)0; @@ -8117,8 +8117,6 @@ mdb_env_cthr_toggle(mdb_copy *my) pthread_mutex_unlock(&my->mc_mutex[toggle]); return my->mc_status; } - my->mc_wlen[toggle] = 0; - my->mc_olen[toggle] = 0; my->mc_toggle = toggle; return 0; } @@ -8134,6 +8132,10 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) int rc, toggle; unsigned int i; + /* Empty DB, nothing to do */ + if (*pg == P_INVALID) + return MDB_SUCCESS; + mc.mc_snum = 1; mc.mc_top = 0; mc.mc_txn = txn; @@ -8307,6 +8309,7 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) my.mc_env = env; my.mc_fd = fd; pthread_mutex_lock(&my.mc_mutex[0]); + THREAD_CREATE(thr, mdb_env_copythr, &my); /* Do the lock/unlock of the reader mutex before starting the * write txn. Otherwise other read txns could block writers. @@ -8365,7 +8368,6 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) } my.mc_wlen[0] = env->me_psize * 2; my.mc_txn = txn; - THREAD_CREATE(thr, mdb_env_copythr, &my); rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) rc = mdb_env_cthr_toggle(&my); From 059b357d1addb52bbbbe4fd257ccfd906c3e6445 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 3 Jul 2014 14:26:14 -0700 Subject: [PATCH 15/24] More tweaks to copyfd2 Make sure the writer thread starts and stops when we expect it to. --- libraries/liblmdb/mdb.c | 82 +++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 6997ae9583..609eb9230a 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -177,6 +177,7 @@ #define THREAD_RET DWORD #define pthread_t HANDLE #define pthread_mutex_t HANDLE +#define pthread_cond_t HANDLE #define pthread_key_t DWORD #define pthread_self() GetCurrentThreadId() #define pthread_key_create(x,y) \ @@ -186,6 +187,8 @@ #define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) #define pthread_mutex_unlock(x) ReleaseMutex(x) #define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) +#define pthread_cond_signal(x) SetEvent(*x) +#define pthread_cond_wait(cond,mutex) SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE) #define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) #define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) #define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) @@ -8034,7 +8037,8 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, #endif typedef struct mdb_copy { - pthread_mutex_t mc_mutex[2]; + pthread_mutex_t mc_mutex; + pthread_cond_t mc_cond; char *mc_wbuf[2]; char *mc_over[2]; MDB_env *mc_env; @@ -8044,6 +8048,7 @@ typedef struct mdb_copy { pgno_t mc_next_pgno; HANDLE mc_fd; int mc_status; + volatile int mc_new; int mc_toggle; } mdb_copy; @@ -8061,14 +8066,17 @@ mdb_env_copythr(void *arg) #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) #endif - pthread_mutex_lock(&my->mc_mutex[toggle^1]); + pthread_mutex_lock(&my->mc_mutex); + my->mc_new = 0; + pthread_cond_signal(&my->mc_cond); for(;;) { - pthread_mutex_lock(&my->mc_mutex[toggle]); - pthread_mutex_unlock(&my->mc_mutex[toggle^1]); - if (!my->mc_wlen[toggle]) { - pthread_mutex_unlock(&my->mc_mutex[toggle]); + while (!my->mc_new) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + if (my->mc_new < 0) { + my->mc_new = 0; break; } + my->mc_new = 0; wsize = my->mc_wlen[toggle]; ptr = my->mc_wbuf[toggle]; again: @@ -8089,7 +8097,6 @@ again: } if (rc) { my->mc_status = rc; - pthread_mutex_unlock(&my->mc_mutex[toggle]); break; } /* If there's an overflow page tail, write it too */ @@ -8101,23 +8108,29 @@ again: } my->mc_wlen[toggle] = 0; toggle ^= 1; + pthread_cond_signal(&my->mc_cond); } + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); return (THREAD_RET)0; #undef DO_WRITE } static int -mdb_env_cthr_toggle(mdb_copy *my) +mdb_env_cthr_toggle(mdb_copy *my, int st) { int toggle = my->mc_toggle ^ 1; - - pthread_mutex_unlock(&my->mc_mutex[my->mc_toggle]); - pthread_mutex_lock(&my->mc_mutex[toggle]); + pthread_mutex_lock(&my->mc_mutex); if (my->mc_status) { - pthread_mutex_unlock(&my->mc_mutex[toggle]); + pthread_mutex_unlock(&my->mc_mutex); return my->mc_status; } + while (my->mc_new == 1) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + my->mc_new = st; my->mc_toggle = toggle; + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); return 0; } @@ -8188,10 +8201,10 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) if (rc) goto done; if (my->mc_wlen[toggle] >= MDB_WBUF) { - rc = mdb_env_cthr_toggle(my); + rc = mdb_env_cthr_toggle(my, 1); if (rc) goto done; - toggle ^= 1; + toggle = my->mc_toggle; } mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); memcpy(mo, omp, my->mc_env->me_psize); @@ -8201,10 +8214,10 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) if (omp->mp_pages > 1) { my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize; - rc = mdb_env_cthr_toggle(my); + rc = mdb_env_cthr_toggle(my, 1); if (rc) goto done; - toggle ^= 1; + toggle = my->mc_toggle; } memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); } else if (ni->mn_flags & F_SUBDATA) { @@ -8250,10 +8263,10 @@ again: } } if (my->mc_wlen[toggle] >= MDB_WBUF) { - rc = mdb_env_cthr_toggle(my); + rc = mdb_env_cthr_toggle(my, 1); if (rc) goto done; - toggle ^= 1; + toggle = my->mc_toggle; } mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); mdb_page_copy(mo, mp, my->mc_env->me_psize); @@ -8286,14 +8299,14 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) int rc; #ifdef _WIN32 - my.mc_mutex[0] = CreateMutex(NULL, FALSE, NULL); - my.mc_mutex[1] = CreateMutex(NULL, FALSE, NULL); + my.mc_mutex = CreateMutex(NULL, FALSE, NULL); + my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL); my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_psize); if (my.mc_wbuf[0] == NULL) return errno; #else - pthread_mutex_init(&my.mc_mutex[0], NULL); - pthread_mutex_init(&my.mc_mutex[1], NULL); + pthread_mutex_init(&my.mc_mutex, NULL); + pthread_cond_init(&my.mc_cond, NULL); rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_psize, MDB_WBUF*2); if (rc) return rc; @@ -8305,11 +8318,10 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) my.mc_olen[1] = 0; my.mc_next_pgno = 2; my.mc_status = 0; + my.mc_new = 1; my.mc_toggle = 0; my.mc_env = env; my.mc_fd = fd; - pthread_mutex_lock(&my.mc_mutex[0]); - THREAD_CREATE(thr, mdb_env_copythr, &my); /* Do the lock/unlock of the reader mutex before starting the * write txn. Otherwise other read txns could block writers. @@ -8332,6 +8344,7 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) } } + THREAD_CREATE(thr, mdb_env_copythr, &my); mp = (MDB_page *)my.mc_wbuf[0]; memset(mp, 0, 2*env->me_psize); mp->mp_pgno = 0; @@ -8368,21 +8381,28 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd) } my.mc_wlen[0] = env->me_psize * 2; my.mc_txn = txn; + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) - rc = mdb_env_cthr_toggle(&my); - my.mc_wlen[my.mc_toggle] = 0; - pthread_mutex_unlock(&my.mc_mutex[my.mc_toggle]); + rc = mdb_env_cthr_toggle(&my, 1); + mdb_env_cthr_toggle(&my, -1); + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); THREAD_FINISH(thr); leave: mdb_txn_abort(txn); #ifdef _WIN32 - CloseHandle(my.mc_mutex[1]); - CloseHandle(my.mc_mutex[0]); + CloseHandle(my.mc_cond); + CloseHandle(my.mc_mutex); _aligned_free(my.mc_wbuf[0]); #else - pthread_mutex_destroy(&my.mc_mutex[1]); - pthread_mutex_destroy(&my.mc_mutex[0]); + pthread_cond_destroy(&my.mc_cond); + pthread_mutex_destroy(&my.mc_mutex); free(my.mc_wbuf[0]); #endif return rc; From 3e98addbe294c69b94545141f466836bf92693ee Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sat, 5 Jul 2014 07:49:10 -0700 Subject: [PATCH 16/24] Rationalize mdb_env_copy2 API --- libraries/liblmdb/Makefile | 2 +- libraries/liblmdb/lmdb.h | 33 +++++++++++++++++------- libraries/liblmdb/mdb.c | 50 ++++++++++++++++++++++++------------ libraries/liblmdb/mdb_copy.c | 19 +++++--------- 4 files changed, 64 insertions(+), 40 deletions(-) diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile index 60ab528f65..25c1095466 100644 --- a/libraries/liblmdb/Makefile +++ b/libraries/liblmdb/Makefile @@ -17,7 +17,7 @@ # read mdb.c before changing any of them. # CC = gcc -W = -W -Wall -Wno-unused-parameter -Wbad-function-cast +W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized THREADS = -pthread OPT = -O2 -g CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index b5791795e6..0bc97cdf40 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -333,6 +333,15 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_MULTIPLE 0x80000 /* @} */ +/** @defgroup mdb_copy Copy Flags + * @{ + */ +/** Compacting copy: Omit free space from copy, and renumber all + * pages sequentially. + */ +#define MDB_CP_COMPACT 0x01 +/* @} */ + /** @brief Cursor Get operations. * * This is the set of all operations for retrieving data @@ -622,14 +631,10 @@ int mdb_env_copy(MDB_env *env, const char *path); */ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); - /** @brief Copy an LMDB environment to the specified path, with compaction. + /** @brief Copy an LMDB environment to the specified path, with options. * * This function may be used to make a backup of an existing environment. - * No lockfile is created, since it gets recreated at need. Unlike - * #mdb_env_copy(), which copies all pages from the environment, this - * function trims freed/unused pages from the copy and reorders leaf - * pages in sequential order. This function may execute more slowly - * than #mdb_env_copy() and will use more CPU time. + * No lockfile is created, since it gets recreated at need. * @note This call can trigger significant file size growth if run in * parallel with write transactions, because it employs a read-only * transaction. See long-lived transactions under @ref caveats_sec. @@ -638,12 +643,20 @@ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); * @param[in] path The directory in which the copy will reside. This * directory must already exist and be writable but must otherwise be * empty. + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + *
    + *
  • #MDB_CP_COMPACT - Perform compaction while copying: omit free + * pages and sequentially renumber all pages in output. This option + * consumes more CPU and runs more slowly than the default. + *
* @return A non-zero error value on failure and 0 on success. */ -int mdb_env_copy2(MDB_env *env, const char *path); +int mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags); /** @brief Copy an LMDB environment to the specified file descriptor, - * with compaction. + * with options. * * This function may be used to make a backup of an existing environment. * No lockfile is created, since it gets recreated at need. See @@ -655,9 +668,11 @@ int mdb_env_copy2(MDB_env *env, const char *path); * must have already been opened successfully. * @param[in] fd The filedescriptor to write the copy to. It must * have already been opened for Write access. + * @param[in] flags Special options for this operation. + * See #mdb_env_copy2() for options. * @return A non-zero error value on failure and 0 on success. */ -int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd); +int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags); /** @brief Return statistics about the LMDB environment. * diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 609eb9230a..5acdba4dac 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8036,6 +8036,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, #define MDB_WBUF (1024*1024) #endif + /** State needed for a compacting copy. */ typedef struct mdb_copy { pthread_mutex_t mc_mutex; pthread_cond_t mc_cond; @@ -8050,8 +8051,10 @@ typedef struct mdb_copy { int mc_status; volatile int mc_new; int mc_toggle; + } mdb_copy; + /** Dedicated writer thread for compacting copy. */ static THREAD_RET mdb_env_copythr(void *arg) { @@ -8116,6 +8119,7 @@ again: #undef DO_WRITE } + /** Tell the writer thread there's a buffer ready to write */ static int mdb_env_cthr_toggle(mdb_copy *my, int st) { @@ -8134,6 +8138,7 @@ mdb_env_cthr_toggle(mdb_copy *my, int st) return 0; } + /** Depth-first tree traversal for compacting copy. */ static int mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) { @@ -8255,6 +8260,9 @@ again: mc.mc_snum++; mc.mc_ki[mc.mc_top] = 0; if (IS_BRANCH(mp)) { + /* Whenever we advance to a sibling branch page, + * we must proceed all the way down to its first leaf. + */ mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize); goto again; } else @@ -8288,8 +8296,9 @@ done: return rc; } -int -mdb_env_copyfd2(MDB_env *env, HANDLE fd) + /** Copy environment with compaction. */ +static int +mdb_env_copyfd1(MDB_env *env, HANDLE fd) { MDB_meta *mm; MDB_page *mp; @@ -8408,8 +8417,9 @@ leave: return rc; } -int -mdb_env_copyfd(MDB_env *env, HANDLE fd) + /** Copy environment as-is. */ +static int +mdb_env_copyfd0(MDB_env *env, HANDLE fd) { MDB_txn *txn = NULL; int rc; @@ -8512,8 +8522,23 @@ leave: return rc; } -static int -mdb_env_copy0(MDB_env *env, const char *path, int flag) +int +mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags) +{ + if (flags & MDB_CP_COMPACT) + return mdb_env_copyfd1(env, fd); + else + return mdb_env_copyfd0(env, fd); +} + +int +mdb_env_copyfd(MDB_env *env, HANDLE fd) +{ + return mdb_env_copyfd2(env, fd, 0); +} + +int +mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) { int rc, len; char *lpath; @@ -8558,10 +8583,7 @@ mdb_env_copy0(MDB_env *env, const char *path, int flag) } #endif - if (flag) - rc = mdb_env_copyfd2(env, newfd); - else - rc = mdb_env_copyfd(env, newfd); + rc = mdb_env_copyfd2(env, newfd, flags); leave: if (!(env->me_flags & MDB_NOSUBDIR)) @@ -8576,13 +8598,7 @@ leave: int mdb_env_copy(MDB_env *env, const char *path) { - return mdb_env_copy0(env, path, 0); -} - -int -mdb_env_copy2(MDB_env *env, const char *path) -{ - return mdb_env_copy0(env, path, 1); + return mdb_env_copy2(env, path, 0); } int diff --git a/libraries/liblmdb/mdb_copy.c b/libraries/liblmdb/mdb_copy.c index 0814519d4d..c54fefe9f1 100644 --- a/libraries/liblmdb/mdb_copy.c +++ b/libraries/liblmdb/mdb_copy.c @@ -33,13 +33,13 @@ int main(int argc,char * argv[]) MDB_env *env; const char *progname = argv[0], *act; unsigned flags = MDB_RDONLY; - int compact = 0; + unsigned cpflags = 0; for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) { if (argv[1][1] == 'n' && argv[1][2] == '\0') flags |= MDB_NOSUBDIR; else if (argv[1][1] == 'c' && argv[1][2] == '\0') - compact = 1; + cpflags |= MDB_CP_COMPACT; else if (argv[1][1] == 'V' && argv[1][2] == '\0') { printf("%s\n", MDB_VERSION_STRING); exit(0); @@ -68,17 +68,10 @@ int main(int argc,char * argv[]) } if (rc == MDB_SUCCESS) { act = "copying"; - if (compact) { - if (argc == 2) - rc = mdb_env_copyfd2(env, MDB_STDOUT); - else - rc = mdb_env_copy2(env, argv[2]); - } else { - if (argc == 2) - rc = mdb_env_copyfd(env, MDB_STDOUT); - else - rc = mdb_env_copy(env, argv[2]); - } + if (argc == 2) + rc = mdb_env_copyfd2(env, MDB_STDOUT, cpflags); + else + rc = mdb_env_copy2(env, argv[2], cpflags); } if (rc) fprintf(stderr, "%s: %s failed, error %d (%s)\n", From 7d996d7f3d4c14086637c3fd929366c93af7c21e Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sat, 5 Jul 2014 08:48:47 -0700 Subject: [PATCH 17/24] Play games with object file layout Use gcc section attribute to keep less frequently used functions away from main code. --- libraries/liblmdb/mdb.c | 90 +++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 5acdba4dac..4dbbe8327c 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -150,6 +150,13 @@ # error "Two's complement, reasonably sized integer types, please" #endif +#ifdef __GNUC__ +/** Put infrequently used env functions in separate section */ +#define ESECT __attribute__ ((section("text_env"))) +#else +#define ESECT +#endif + /** @defgroup internal LMDB Internals * @{ */ @@ -3256,7 +3263,7 @@ fail: * @param[out] meta address of where to store the meta information * @return 0 on success, non-zero on failure. */ -static int +static int ESECT mdb_env_read_header(MDB_env *env, MDB_meta *meta) { MDB_metabuf pbuf; @@ -3314,7 +3321,7 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) return 0; } -static void +static void ESECT mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) { meta->mm_magic = MDB_MAGIC; @@ -3333,7 +3340,7 @@ mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) * @param[out] meta address of where to store the meta information * @return 0 on success, non-zero on failure. */ -static int +static int ESECT mdb_env_init_meta(MDB_env *env, MDB_meta *meta) { MDB_page *p, *q; @@ -3513,7 +3520,7 @@ mdb_env_pick_meta(const MDB_env *env) return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); } -int +int ESECT mdb_env_create(MDB_env **env) { MDB_env *e; @@ -3538,7 +3545,7 @@ mdb_env_create(MDB_env **env) return MDB_SUCCESS; } -static int +static int ESECT mdb_env_map(MDB_env *env, void *addr, int newsize) { MDB_page *p; @@ -3621,7 +3628,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_mapsize(MDB_env *env, size_t size) { /* If env is already open, caller is responsible for making @@ -3655,7 +3662,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) { if (env->me_map) @@ -3664,7 +3671,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) { if (env->me_map || readers < 1) @@ -3673,7 +3680,7 @@ mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) { if (!env || !readers) @@ -3684,7 +3691,7 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) /** Further setup required for opening an LMDB environment */ -static int +static int ESECT mdb_env_open2(MDB_env *env) { unsigned int flags = env->me_flags; @@ -3841,7 +3848,7 @@ PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; #endif /** Downgrade the exclusive lock on the region back to shared */ -static int +static int ESECT mdb_env_share_locks(MDB_env *env, int *excl) { int rc = 0, toggle = mdb_env_pick_meta(env); @@ -3883,7 +3890,7 @@ mdb_env_share_locks(MDB_env *env, int *excl) /** Try to get exlusive lock, otherwise shared. * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive. */ -static int +static int ESECT mdb_env_excl_lock(MDB_env *env, int *excl) { int rc = 0; @@ -4025,7 +4032,7 @@ mdb_hash_enc(MDB_val *val, char *encbuf) * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive * @return 0 on success, non-zero on failure. */ -static int +static int ESECT mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) { #ifdef _WIN32 @@ -4255,7 +4262,7 @@ fail: # error "Persistent DB flags & env flags overlap, but both go in mm_flags" #endif -int +int ESECT mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) { int oflags, rc, len, excl = -1; @@ -4383,7 +4390,7 @@ leave: } /** Destroy resources from mdb_env_open(), clear our readers & DBIs */ -static void +static void ESECT mdb_env_close0(MDB_env *env, int excl) { int i; @@ -4472,7 +4479,7 @@ mdb_env_close0(MDB_env *env, int excl) } -void +void ESECT mdb_env_close(MDB_env *env) { MDB_page *dp; @@ -8055,7 +8062,7 @@ typedef struct mdb_copy { } mdb_copy; /** Dedicated writer thread for compacting copy. */ -static THREAD_RET +static THREAD_RET ESECT mdb_env_copythr(void *arg) { mdb_copy *my = arg; @@ -8120,7 +8127,7 @@ again: } /** Tell the writer thread there's a buffer ready to write */ -static int +static int ESECT mdb_env_cthr_toggle(mdb_copy *my, int st) { int toggle = my->mc_toggle ^ 1; @@ -8139,7 +8146,7 @@ mdb_env_cthr_toggle(mdb_copy *my, int st) } /** Depth-first tree traversal for compacting copy. */ -static int +static int ESECT mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) { MDB_cursor mc; @@ -8297,7 +8304,7 @@ done: } /** Copy environment with compaction. */ -static int +static int ESECT mdb_env_copyfd1(MDB_env *env, HANDLE fd) { MDB_meta *mm; @@ -8418,7 +8425,7 @@ leave: } /** Copy environment as-is. */ -static int +static int ESECT mdb_env_copyfd0(MDB_env *env, HANDLE fd) { MDB_txn *txn = NULL; @@ -8522,7 +8529,7 @@ leave: return rc; } -int +int ESECT mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags) { if (flags & MDB_CP_COMPACT) @@ -8531,13 +8538,13 @@ mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags) return mdb_env_copyfd0(env, fd); } -int +int ESECT mdb_env_copyfd(MDB_env *env, HANDLE fd) { return mdb_env_copyfd2(env, fd, 0); } -int +int ESECT mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) { int rc, len; @@ -8595,13 +8602,13 @@ leave: return rc; } -int +int ESECT mdb_env_copy(MDB_env *env, const char *path) { return mdb_env_copy2(env, path, 0); } -int +int ESECT mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) { if ((flag & CHANGEABLE) != flag) @@ -8613,7 +8620,7 @@ mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_flags(MDB_env *env, unsigned int *arg) { if (!env || !arg) @@ -8623,7 +8630,7 @@ mdb_env_get_flags(MDB_env *env, unsigned int *arg) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_userctx(MDB_env *env, void *ctx) { if (!env) @@ -8632,13 +8639,13 @@ mdb_env_set_userctx(MDB_env *env, void *ctx) return MDB_SUCCESS; } -void * +void * ESECT mdb_env_get_userctx(MDB_env *env) { return env ? env->me_userctx : NULL; } -int +int ESECT mdb_env_set_assert(MDB_env *env, MDB_assert_func *func) { if (!env) @@ -8649,7 +8656,7 @@ mdb_env_set_assert(MDB_env *env, MDB_assert_func *func) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_path(MDB_env *env, const char **arg) { if (!env || !arg) @@ -8659,7 +8666,7 @@ mdb_env_get_path(MDB_env *env, const char **arg) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg) { if (!env || !arg) @@ -8675,7 +8682,7 @@ mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg) * @param[out] arg the address of an #MDB_stat structure to receive the stats. * @return 0, this function always succeeds. */ -static int +static int ESECT mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) { arg->ms_psize = env->me_psize; @@ -8687,7 +8694,8 @@ mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) return MDB_SUCCESS; } -int + +int ESECT mdb_env_stat(MDB_env *env, MDB_stat *arg) { int toggle; @@ -8700,7 +8708,7 @@ mdb_env_stat(MDB_env *env, MDB_stat *arg) return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); } -int +int ESECT mdb_env_info(MDB_env *env, MDB_envinfo *arg) { int toggle; @@ -9057,12 +9065,14 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx) return MDB_SUCCESS; } -int mdb_env_get_maxkeysize(MDB_env *env) +int ESECT +mdb_env_get_maxkeysize(MDB_env *env) { return ENV_MAXKEY(env); } -int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) +int ESECT +mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) { unsigned int i, rdrs; MDB_reader *mr; @@ -9102,7 +9112,8 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) /** Insert pid into list if not already present. * return -1 if already present. */ -static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) +static int ESECT +mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) { /* binary search of pid in list */ unsigned base = 0; @@ -9138,7 +9149,8 @@ static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) return 0; } -int mdb_reader_check(MDB_env *env, int *dead) +int ESECT +mdb_reader_check(MDB_env *env, int *dead) { unsigned int i, j, rdrs; MDB_reader *mr; From 61f7ec9bfa772f458d0ef8c25a298d7f4b42d8b3 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 6 Jul 2014 07:49:38 -0700 Subject: [PATCH 18/24] Windows fixes --- libraries/liblmdb/mdb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 4dbbe8327c..bdc11b5a09 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -192,16 +192,16 @@ #define pthread_key_delete(x) TlsFree(x) #define pthread_getspecific(x) TlsGetValue(x) #define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) -#define pthread_mutex_unlock(x) ReleaseMutex(x) -#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) +#define pthread_mutex_unlock(x) ReleaseMutex(*x) +#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE) #define pthread_cond_signal(x) SetEvent(*x) -#define pthread_cond_wait(cond,mutex) SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE) +#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) #define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) #define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) -#define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex) -#define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex) -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex) +#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex) +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex) +#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex) +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex) #define getpid() GetCurrentProcessId() #define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd)) #define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len)) From 1ac5147fc175907f0c657ce8416c70242eb91fd5 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 6 Jul 2014 17:47:25 -0700 Subject: [PATCH 19/24] Compacting copy doesn't need to snapshot metas --- libraries/liblmdb/mdb.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index bdc11b5a09..e0465d4795 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8338,29 +8338,12 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) my.mc_toggle = 0; my.mc_env = env; my.mc_fd = fd; + THREAD_CREATE(thr, mdb_env_copythr, &my); - /* Do the lock/unlock of the reader mutex before starting the - * write txn. Otherwise other read txns could block writers. - */ rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) return rc; - if (env->me_txns) { - /* We must start the actual read txn after blocking writers */ - mdb_txn_reset0(txn, "reset-stage1"); - - /* Temporarily block writers until we snapshot the meta pages */ - LOCK_MUTEX_W(env); - - rc = mdb_txn_renew0(txn); - if (rc) { - UNLOCK_MUTEX_W(env); - goto leave; - } - } - - THREAD_CREATE(thr, mdb_env_copythr, &my); mp = (MDB_page *)my.mc_wbuf[0]; memset(mp, 0, 2*env->me_psize); mp->mp_pgno = 0; @@ -8410,7 +8393,7 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) pthread_cond_wait(&my.mc_cond, &my.mc_mutex); pthread_mutex_unlock(&my.mc_mutex); THREAD_FINISH(thr); -leave: + mdb_txn_abort(txn); #ifdef _WIN32 CloseHandle(my.mc_cond); From 534dcc721287cfe1051191a5cd7d3df6cf1dc409 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 7 Jul 2014 17:13:51 -0700 Subject: [PATCH 20/24] Set xcursor EOF after deleting last entry --- libraries/liblmdb/mdb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index e0465d4795..6d02f8e5d1 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -6343,6 +6343,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) return rc; } /* otherwise fall thru and delete the sub-DB */ + mc->mc_xcursor->mx_cursor.mc_flags |= C_EOF; } if (leaf->mn_flags & F_SUBDATA) { From 68e6faad5c32293a5fdb1b7b7e99734f1cc1d1d1 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 7 Jul 2014 09:12:22 -0700 Subject: [PATCH 21/24] ITS#7713 handle 65536 byte page size Fix mp_lower/mp_upper to exclude PAGEHDRSZ, which means our max mp_upper is now pagesize - PAGEHDRSZ and won't overflow. Currently must define MDB_DEVEL to use. Will be default for LMDB 1.0/OpenLDAP 2.5 --- libraries/liblmdb/mdb.c | 53 ++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 6d02f8e5d1..d31cdb2c42 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -693,8 +693,17 @@ typedef struct MDB_page { /** Address of first usable data byte in a page, after the header */ #define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) + /** ITS#7713, change PAGEBASE to handle 65536 byte pages */ +#ifdef MDB_DEVEL +#define MP_LOBASE 0 +#define MP_HIBASE PAGEHDRSZ +#else +#define MP_LOBASE PAGEHDRSZ +#define MP_HIBASE 0 +#endif + /** Number of nodes on a page */ -#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1) +#define NUMKEYS(p) (((p)->mp_lower - MP_LOBASE) >> 1) /** The amount of space remaining in the page */ #define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) @@ -775,7 +784,7 @@ typedef struct MDB_node { #define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) /** Address of node \b i in page \b p */ -#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) +#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + MP_HIBASE)) /** Address of the key for the node */ #define NODEKEY(node) (void *)((node)->mn_data) @@ -1385,7 +1394,7 @@ mdb_page_list(MDB_page *mp) total = EVEN(total); } fprintf(stderr, "Total: header %d + contents %d + unused %d\n", - IS_LEAF2(mp) ? PAGEHDRSZ : mp->mp_lower, total, SIZELEFT(mp)); + IS_LEAF2(mp) ? PAGEHDRSZ : MP_HIBASE + mp->mp_lower, total, SIZELEFT(mp)); } void @@ -2040,9 +2049,9 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize) */ if ((unused &= -Align) && !IS_LEAF2(src)) { upper &= -Align; - memcpy(dst, src, (lower + (Align-1)) & -Align); - memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper), - psize - upper); + memcpy(dst, src, (lower + MP_HIBASE + (Align-1)) & -Align); + memcpy((pgno_t *)((char *)dst+upper+MP_HIBASE), (pgno_t *)((char *)src+upper+MP_HIBASE), + psize - upper - MP_HIBASE); } else { memcpy(dst, src, psize - unused); } @@ -5936,7 +5945,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, fp_flags = P_LEAF|P_DIRTY; fp = env->me_pbuf; fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */ - fp->mp_lower = fp->mp_upper = olddata.mv_size = PAGEHDRSZ; + fp->mp_lower = fp->mp_upper = olddata.mv_size = MP_LOBASE; goto prep_subDB; } } else { @@ -5991,7 +6000,7 @@ more: /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; - fp->mp_lower = PAGEHDRSZ; + fp->mp_lower = MP_LOBASE; xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size; if (mc->mc_db->md_flags & MDB_DUPFIXED) { fp->mp_flags |= P_LEAF2; @@ -6001,8 +6010,8 @@ more: xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) + (dkey.mv_size & 1) + (data->mv_size & 1); } - fp->mp_upper = xdata.mv_size; - olddata.mv_size = fp->mp_upper; /* pretend olddata is fp */ + fp->mp_upper = xdata.mv_size - MP_HIBASE; + olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */ } else if (leaf->mn_flags & F_SUBDATA) { /* Data is on sub-DB, just store it */ flags |= F_DUPDATA|F_SUBDATA; @@ -6069,7 +6078,7 @@ prep_subDB: if (fp_flags & P_LEAF2) { memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad); } else { - memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper, + memcpy((char *)mp + mp->mp_upper + MP_HIBASE, (char *)fp + fp->mp_upper + MP_HIBASE, olddata.mv_size - fp->mp_upper); for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; @@ -6392,8 +6401,8 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) DPRINTF(("allocated new mpage %"Z"u, page size %u", np->mp_pgno, mc->mc_txn->mt_env->me_psize)); np->mp_flags = flags | P_DIRTY; - np->mp_lower = PAGEHDRSZ; - np->mp_upper = mc->mc_txn->mt_env->me_psize; + np->mp_lower = MP_LOBASE; + np->mp_upper = mc->mc_txn->mt_env->me_psize - MP_HIBASE; if (IS_BRANCH(np)) mc->mc_db->md_branch_pages++; @@ -6646,7 +6655,7 @@ mdb_node_del(MDB_cursor *mc, int ksize) } } - base = (char *)mp + mp->mp_upper; + base = (char *)mp + mp->mp_upper + MP_HIBASE; memmove(base + sz, base, ptr - mp->mp_upper); mp->mp_lower -= sizeof(indx_t); @@ -6700,7 +6709,7 @@ mdb_node_shrink(MDB_page *mp, indx_t indx) mp->mp_ptrs[i] += delta; } - base = (char *)mp + mp->mp_upper; + base = (char *)mp + mp->mp_upper + MP_HIBASE; memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); mp->mp_upper += delta; } @@ -6972,7 +6981,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) mp->mp_ptrs[i] -= delta; } - base = (char *)mp + mp->mp_upper; + base = (char *)mp + mp->mp_upper + MP_HIBASE; len = ptr - mp->mp_upper + NODESIZE; memmove(base - delta, base, len); mp->mp_upper -= delta; @@ -7763,8 +7772,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno } copy->mp_pgno = mp->mp_pgno; copy->mp_flags = mp->mp_flags; - copy->mp_lower = PAGEHDRSZ; - copy->mp_upper = env->me_psize; + copy->mp_lower = MP_LOBASE; + copy->mp_upper = env->me_psize - MP_HIBASE; /* prepare to insert */ for (i=0, j=0; imp_ptrs[i]); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + MP_HIBASE); psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t); if (IS_LEAF(mp)) { if (F_ISSET(node->mn_flags, F_BIGDATA)) @@ -7824,7 +7833,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno sepkey.mv_size = newkey->mv_size; sepkey.mv_data = newkey->mv_data; } else { - node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx]); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + MP_HIBASE); sepkey.mv_size = node->mn_ksize; sepkey.mv_data = NODEKEY(node); } @@ -7905,7 +7914,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno /* Update index for the new key. */ mc->mc_ki[mc->mc_top] = j; } else { - node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + MP_HIBASE); rkey.mv_data = NODEKEY(node); rkey.mv_size = node->mn_ksize; if (IS_LEAF(mp)) { @@ -7941,7 +7950,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno mp->mp_lower = copy->mp_lower; mp->mp_upper = copy->mp_upper; memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1), - env->me_psize - copy->mp_upper); + env->me_psize - copy->mp_upper - MP_HIBASE); /* reset back to original page */ if (newindx < split_indx) { From b3e8c71dc7d629f6311801774d275712bf72631f Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 8 Jul 2014 11:54:24 -0700 Subject: [PATCH 22/24] Re-fix cursor EOF Fix in 534dcc721287cfe1051191a5cd7d3df6cf1dc409 was in wrong place --- libraries/liblmdb/mdb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index d31cdb2c42..ac1850733f 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -6352,7 +6352,6 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) return rc; } /* otherwise fall thru and delete the sub-DB */ - mc->mc_xcursor->mx_cursor.mc_flags |= C_EOF; } if (leaf->mn_flags & F_SUBDATA) { @@ -7537,8 +7536,10 @@ mdb_cursor_del0(MDB_cursor *mc) /* if mc points past last node in page, find next sibling */ if (mc->mc_ki[mc->mc_top] >= nkeys) { rc = mdb_cursor_sibling(mc, 1); - if (rc == MDB_NOTFOUND) + if (rc == MDB_NOTFOUND) { + mc->mc_flags |= C_EOF; rc = MDB_SUCCESS; + } } /* Adjust other cursors pointing to mp */ @@ -7556,8 +7557,10 @@ mdb_cursor_del0(MDB_cursor *mc) } if (m3->mc_ki[mc->mc_top] >= nkeys) { rc = mdb_cursor_sibling(m3, 1); - if (rc == MDB_NOTFOUND) + if (rc == MDB_NOTFOUND) { + m3->mc_flags |= C_EOF; rc = MDB_SUCCESS; + } } } } From 6ed295b2569c916f9ab1a93c76b6081ad2910794 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 8 Jul 2014 11:52:53 -0700 Subject: [PATCH 23/24] Fixes for loose pages mdb_txn_commit(child): Give loose pages to parent. Use a pointer beyond the page header instead of mp_next, so we will not need to save/restore mp_pgno. This avoids a crash caused by references to mp_pgno. --- libraries/liblmdb/mdb.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index ac1850733f..3d539ffa8f 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -655,7 +655,7 @@ typedef struct MDB_page { #define mp_next mp_p.p_next union { pgno_t p_pgno; /**< page number */ - void * p_next; /**< for in-memory list of freed structs */ + struct MDB_page *p_next; /**< for in-memory list of freed pages */ } mp_p; uint16_t mp_pad; /** @defgroup mdb_page Page Flags @@ -731,7 +731,7 @@ typedef struct MDB_page { #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) /** Link in #MDB_txn.%mt_loose_pages list */ -#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)METADATA(p)) +#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2)) /** Header for a single key/data pair within a page. * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. @@ -1601,6 +1601,8 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) } } if (loose) { + DPRINTF(("loosen db %d page %"Z"u", DDBI(mc), + mp->mp_pgno)); NEXT_LOOSE_PAGE(mp) = mc->mc_txn->mt_loose_pgs; mc->mc_txn->mt_loose_pgs = mp; mp->mp_flags |= P_LOOSE; @@ -1623,7 +1625,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) static int mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) { - enum { Mask = P_SUBP|P_DIRTY|P_KEEP }; + enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP }; MDB_txn *txn = mc->mc_txn; MDB_cursor *m3; MDB_xcursor *mx; @@ -1661,12 +1663,6 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) break; } - /* Loose pages shouldn't be spilled */ - for (dp = txn->mt_loose_pgs; dp; dp = NEXT_LOOSE_PAGE(dp)) { - if ((dp->mp_flags & Mask) == pflags) - dp->mp_flags ^= P_KEEP; - } - if (all) { /* Mark dirty root pages */ for (i=0; imt_numdbs; i++) { @@ -1780,7 +1776,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) for (i=dl[0].mid; i && need; i--) { MDB_ID pn = dl[i].mid << 1; dp = dl[i].mptr; - if (dp->mp_flags & P_KEEP) + if (dp->mp_flags & (P_LOOSE|P_KEEP)) continue; /* Can't spill twice, make sure it's not already in a parent's * spill list. @@ -1898,6 +1894,8 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) if (num == 1 && txn->mt_loose_pgs) { np = txn->mt_loose_pgs; txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np); + DPRINTF(("db %d use loose page %"Z"u", DDBI(mc), + np->mp_pgno)); *mp = np; return MDB_SUCCESS; } @@ -2951,8 +2949,8 @@ mdb_page_flush(MDB_txn *txn, int keep) while (++i <= pagecount) { dp = dl[i].mptr; /* Don't flush this page yet */ - if (dp->mp_flags & P_KEEP) { - dp->mp_flags ^= P_KEEP; + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; dl[++j] = dl[i]; continue; } @@ -2966,8 +2964,8 @@ mdb_page_flush(MDB_txn *txn, int keep) if (++i <= pagecount) { dp = dl[i].mptr; /* Don't flush this page yet */ - if (dp->mp_flags & P_KEEP) { - dp->mp_flags ^= P_KEEP; + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; dl[i].mid = 0; continue; } @@ -3096,6 +3094,7 @@ mdb_txn_commit(MDB_txn *txn) if (txn->mt_parent) { MDB_txn *parent = txn->mt_parent; + MDB_page **lp; MDB_ID2L dst, src; MDB_IDL pspill; unsigned x, y, len, ps_len; @@ -3193,6 +3192,11 @@ mdb_txn_commit(MDB_txn *txn) } } + /* Append our loose page list to parent's */ + for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp)) + ; + *lp = txn->mt_loose_pgs; + parent->mt_child = NULL; mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead); free(txn); From e1a676eb18448d0ea396c9bbe8908d25b38ddb6d Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 8 Jul 2014 15:01:18 -0700 Subject: [PATCH 24/24] ITS#7713 cleanups '#ifdef MDB_DEVEL' -> '#if MDB_DEVEL' Fix comments, use actual PAGEBASE macro as originally intended. Actually enable 64K pages. Set MDB_DATA_VERSION for testing only. --- libraries/liblmdb/mdb.c | 71 +++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 3d539ffa8f..d8cf710bfd 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -168,6 +168,11 @@ * @{ */ +/* Features under development */ +#ifndef MDB_DEVEL +#define MDB_DEVEL 0 +#endif + /** Wrapper around __func__, which is a C99 feature */ #if __STDC_VERSION__ >= 199901L # define mdb_func_ __func__ @@ -372,7 +377,8 @@ static txnid_t mdb_debug_start; /** @brief The maximum size of a database page. * - * This is 32k, since it must fit in #MDB_page.%mp_upper. + * It is 32k or 64k, since value-PAGEBASE must fit in + * #MDB_page.%mp_upper. * * LMDB will use database pages < OS pages if needed. * That causes more I/O in write transactions: The OS must @@ -385,7 +391,7 @@ static txnid_t mdb_debug_start; * pressure from other processes is high. So until OSs have * actual paging support for Huge pages, they're not viable. */ -#define MAX_PAGESIZE 0x8000 +#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000) /** The minimum number of keys required in a database page. * Setting this to a larger value will place a smaller bound on the @@ -408,7 +414,7 @@ static txnid_t mdb_debug_start; #define MDB_MAGIC 0xBEEFC0DE /** The version number for a database's datafile format. */ -#define MDB_DATA_VERSION 1 +#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1) /** The version number for a database's lockfile format. */ #define MDB_LOCK_VERSION 1 @@ -694,16 +700,10 @@ typedef struct MDB_page { #define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) /** ITS#7713, change PAGEBASE to handle 65536 byte pages */ -#ifdef MDB_DEVEL -#define MP_LOBASE 0 -#define MP_HIBASE PAGEHDRSZ -#else -#define MP_LOBASE PAGEHDRSZ -#define MP_HIBASE 0 -#endif +#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0) /** Number of nodes on a page */ -#define NUMKEYS(p) (((p)->mp_lower - MP_LOBASE) >> 1) +#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1) /** The amount of space remaining in the page */ #define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) @@ -784,7 +784,7 @@ typedef struct MDB_node { #define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) /** Address of node \b i in page \b p */ -#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + MP_HIBASE)) +#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE)) /** Address of the key for the node */ #define NODEKEY(node) (void *)((node)->mn_data) @@ -1394,7 +1394,7 @@ mdb_page_list(MDB_page *mp) total = EVEN(total); } fprintf(stderr, "Total: header %d + contents %d + unused %d\n", - IS_LEAF2(mp) ? PAGEHDRSZ : MP_HIBASE + mp->mp_lower, total, SIZELEFT(mp)); + IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp)); } void @@ -2046,10 +2046,10 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize) * alignment so memcpy may copy words instead of bytes. */ if ((unused &= -Align) && !IS_LEAF2(src)) { - upper &= -Align; - memcpy(dst, src, (lower + MP_HIBASE + (Align-1)) & -Align); - memcpy((pgno_t *)((char *)dst+upper+MP_HIBASE), (pgno_t *)((char *)src+upper+MP_HIBASE), - psize - upper - MP_HIBASE); + upper = (upper + PAGEBASE) & -Align; + memcpy(dst, src, (lower + PAGEBASE + (Align-1)) & -Align); + memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper), + psize - upper); } else { memcpy(dst, src, psize - unused); } @@ -5945,11 +5945,14 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if ((mc->mc_db->md_flags & MDB_DUPSORT) && LEAFSIZE(key, data) > env->me_nodemax) { - /* Too big for a node, insert in sub-DB */ + /* Too big for a node, insert in sub-DB. Set up an empty + * "old sub-page" for prep_subDB to expand to a full page. + */ fp_flags = P_LEAF|P_DIRTY; fp = env->me_pbuf; fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */ - fp->mp_lower = fp->mp_upper = olddata.mv_size = MP_LOBASE; + fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE); + olddata.mv_size = PAGEHDRSZ; goto prep_subDB; } } else { @@ -6004,7 +6007,7 @@ more: /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; - fp->mp_lower = MP_LOBASE; + fp->mp_lower = (PAGEHDRSZ-PAGEBASE); xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size; if (mc->mc_db->md_flags & MDB_DUPFIXED) { fp->mp_flags |= P_LEAF2; @@ -6014,7 +6017,7 @@ more: xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) + (dkey.mv_size & 1) + (data->mv_size & 1); } - fp->mp_upper = xdata.mv_size - MP_HIBASE; + fp->mp_upper = xdata.mv_size - PAGEBASE; olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */ } else if (leaf->mn_flags & F_SUBDATA) { /* Data is on sub-DB, just store it */ @@ -6082,8 +6085,8 @@ prep_subDB: if (fp_flags & P_LEAF2) { memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad); } else { - memcpy((char *)mp + mp->mp_upper + MP_HIBASE, (char *)fp + fp->mp_upper + MP_HIBASE, - olddata.mv_size - fp->mp_upper); + memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, + olddata.mv_size - fp->mp_upper - PAGEBASE); for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; } @@ -6404,8 +6407,8 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) DPRINTF(("allocated new mpage %"Z"u, page size %u", np->mp_pgno, mc->mc_txn->mt_env->me_psize)); np->mp_flags = flags | P_DIRTY; - np->mp_lower = MP_LOBASE; - np->mp_upper = mc->mc_txn->mt_env->me_psize - MP_HIBASE; + np->mp_lower = (PAGEHDRSZ-PAGEBASE); + np->mp_upper = mc->mc_txn->mt_env->me_psize - PAGEBASE; if (IS_BRANCH(np)) mc->mc_db->md_branch_pages++; @@ -6658,7 +6661,7 @@ mdb_node_del(MDB_cursor *mc, int ksize) } } - base = (char *)mp + mp->mp_upper + MP_HIBASE; + base = (char *)mp + mp->mp_upper + PAGEBASE; memmove(base + sz, base, ptr - mp->mp_upper); mp->mp_lower -= sizeof(indx_t); @@ -6712,7 +6715,7 @@ mdb_node_shrink(MDB_page *mp, indx_t indx) mp->mp_ptrs[i] += delta; } - base = (char *)mp + mp->mp_upper + MP_HIBASE; + base = (char *)mp + mp->mp_upper + PAGEBASE; memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); mp->mp_upper += delta; } @@ -6984,7 +6987,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) mp->mp_ptrs[i] -= delta; } - base = (char *)mp + mp->mp_upper + MP_HIBASE; + base = (char *)mp + mp->mp_upper + PAGEBASE; len = ptr - mp->mp_upper + NODESIZE; memmove(base - delta, base, len); mp->mp_upper -= delta; @@ -7779,8 +7782,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno } copy->mp_pgno = mp->mp_pgno; copy->mp_flags = mp->mp_flags; - copy->mp_lower = MP_LOBASE; - copy->mp_upper = env->me_psize - MP_HIBASE; + copy->mp_lower = (PAGEHDRSZ-PAGEBASE); + copy->mp_upper = env->me_psize - PAGEBASE; /* prepare to insert */ for (i=0, j=0; imp_ptrs[i] + MP_HIBASE); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t); if (IS_LEAF(mp)) { if (F_ISSET(node->mn_flags, F_BIGDATA)) @@ -7840,7 +7843,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno sepkey.mv_size = newkey->mv_size; sepkey.mv_data = newkey->mv_data; } else { - node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + MP_HIBASE); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + PAGEBASE); sepkey.mv_size = node->mn_ksize; sepkey.mv_data = NODEKEY(node); } @@ -7921,7 +7924,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno /* Update index for the new key. */ mc->mc_ki[mc->mc_top] = j; } else { - node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + MP_HIBASE); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); rkey.mv_data = NODEKEY(node); rkey.mv_size = node->mn_ksize; if (IS_LEAF(mp)) { @@ -7957,7 +7960,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno mp->mp_lower = copy->mp_lower; mp->mp_upper = copy->mp_upper; memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1), - env->me_psize - copy->mp_upper - MP_HIBASE); + env->me_psize - copy->mp_upper - PAGEBASE); /* reset back to original page */ if (newindx < split_indx) {