diff --git a/libraries/liblmdb/.gitignore b/libraries/liblmdb/.gitignore index 0b4b1cba67..f3277afe42 100644 --- a/libraries/liblmdb/.gitignore +++ b/libraries/liblmdb/.gitignore @@ -10,6 +10,9 @@ mdb_stat *.bak *.orig *.rej +*.gcov +*.gcda +*.gcno core core.* valgrind.* diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile index b65c9b9a8b..25c1095466 100644 --- a/libraries/liblmdb/Makefile +++ b/libraries/liblmdb/Makefile @@ -17,7 +17,7 @@ # read mdb.c before changing any of them. # CC = gcc -W = -W -Wall -Wno-unused-parameter -Wbad-function-cast +W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized THREADS = -pthread OPT = -O2 -g CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) @@ -76,3 +76,22 @@ midl.o: midl.c midl.h %.o: %.c lmdb.h $(CC) $(CFLAGS) $(CPPFLAGS) -c $< + +COV_FLAGS=-fprofile-arcs -ftest-coverage +COV_OBJS=xmdb.o xmidl.o + +coverage: xmtest + for i in mtest*.c [0-9]*.c; do j=`basename \$$i .c`; $(MAKE) $$j.o; \ + gcc -o x$$j $$j.o $(COV_OBJS) -pthread $(COV_FLAGS); \ + rm -rf testdb; mkdir testdb; ./x$$j; done + gcov xmdb.c + gcov xmidl.c + +xmtest: mtest.o xmdb.o xmidl.o + gcc -o xmtest mtest.o xmdb.o xmidl.o -pthread $(COV_FLAGS) + +xmdb.o: mdb.c lmdb.h midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c mdb.c -o $@ + +xmidl.o: midl.c midl.h + $(CC) $(CFLAGS) -fPIC $(CPPFLAGS) -O0 $(COV_FLAGS) -c midl.c -o $@ diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index 98d9cc1e2c..0bc97cdf40 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -333,6 +333,15 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_MULTIPLE 0x80000 /* @} */ +/** @defgroup mdb_copy Copy Flags + * @{ + */ +/** Compacting copy: Omit free space from copy, and renumber all + * pages sequentially. + */ +#define MDB_CP_COMPACT 0x01 +/* @} */ + /** @brief Cursor Get operations. * * This is the set of all operations for retrieving data @@ -622,6 +631,49 @@ int mdb_env_copy(MDB_env *env, const char *path); */ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); + /** @brief Copy an LMDB environment to the specified path, with options. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags); + + /** @brief Copy an LMDB environment to the specified file descriptor, + * with options. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. See + * #mdb_env_copy2() for further details. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @param[in] flags Special options for this operation. + * See #mdb_env_copy2() for options. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags); + /** @brief Return statistics about the LMDB environment. * * @param[in] env An environment handle returned by #mdb_env_create() diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index c21f556e08..d8cf710bfd 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -35,15 +35,17 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif -#include -#include #ifdef _WIN32 +#include #include /** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it * as int64 which is wrong. MSVC doesn't define it at all, so just * don't use it. */ #define MDB_PID_T int +#define MDB_THR_T DWORD +#include +#include #ifdef __GNUC__ # include #else @@ -55,7 +57,10 @@ # endif #endif #else +#include +#include #define MDB_PID_T pid_t +#define MDB_THR_T pthread_t #include #include #include @@ -145,6 +150,13 @@ # error "Two's complement, reasonably sized integer types, please" #endif +#ifdef __GNUC__ +/** Put infrequently used env functions in separate section */ +#define ESECT __attribute__ ((section("text_env"))) +#else +#define ESECT +#endif + /** @defgroup internal LMDB Internals * @{ */ @@ -156,6 +168,11 @@ * @{ */ +/* Features under development */ +#ifndef MDB_DEVEL +#define MDB_DEVEL 0 +#endif + /** Wrapper around __func__, which is a C99 feature */ #if __STDC_VERSION__ >= 199901L # define mdb_func_ __func__ @@ -169,8 +186,10 @@ #ifdef _WIN32 #define MDB_USE_HASH 1 #define MDB_PIDLOCK 0 -#define pthread_t DWORD +#define THREAD_RET DWORD +#define pthread_t HANDLE #define pthread_mutex_t HANDLE +#define pthread_cond_t HANDLE #define pthread_key_t DWORD #define pthread_self() GetCurrentThreadId() #define pthread_key_create(x,y) \ @@ -178,12 +197,16 @@ #define pthread_key_delete(x) TlsFree(x) #define pthread_getspecific(x) TlsGetValue(x) #define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) -#define pthread_mutex_unlock(x) ReleaseMutex(x) -#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) -#define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex) -#define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex) -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex) +#define pthread_mutex_unlock(x) ReleaseMutex(*x) +#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE) +#define pthread_cond_signal(x) SetEvent(*x) +#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) +#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) +#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) +#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex) +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex) +#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex) +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex) #define getpid() GetCurrentProcessId() #define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd)) #define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len)) @@ -198,7 +221,9 @@ #endif #define Z "I" #else - +#define THREAD_RET void * +#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg) +#define THREAD_FINISH(thr) pthread_join(thr,NULL) #define Z "z" /**< printf format modifier for size_t */ /** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */ @@ -352,7 +377,8 @@ static txnid_t mdb_debug_start; /** @brief The maximum size of a database page. * - * This is 32k, since it must fit in #MDB_page.%mp_upper. + * It is 32k or 64k, since value-PAGEBASE must fit in + * #MDB_page.%mp_upper. * * LMDB will use database pages < OS pages if needed. * That causes more I/O in write transactions: The OS must @@ -365,7 +391,7 @@ static txnid_t mdb_debug_start; * pressure from other processes is high. So until OSs have * actual paging support for Huge pages, they're not viable. */ -#define MAX_PAGESIZE 0x8000 +#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000) /** The minimum number of keys required in a database page. * Setting this to a larger value will place a smaller bound on the @@ -388,7 +414,7 @@ static txnid_t mdb_debug_start; #define MDB_MAGIC 0xBEEFC0DE /** The version number for a database's datafile format. */ -#define MDB_DATA_VERSION 1 +#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1) /** The version number for a database's lockfile format. */ #define MDB_LOCK_VERSION 1 @@ -537,7 +563,7 @@ typedef struct MDB_rxbody { /** The process ID of the process owning this reader txn. */ MDB_PID_T mrb_pid; /** The thread ID of the thread owning this txn. */ - pthread_t mrb_tid; + MDB_THR_T mrb_tid; } MDB_rxbody; /** The actual reader record, with cacheline padding. */ @@ -635,7 +661,7 @@ typedef struct MDB_page { #define mp_next mp_p.p_next union { pgno_t p_pgno; /**< page number */ - void * p_next; /**< for in-memory list of freed structs */ + struct MDB_page *p_next; /**< for in-memory list of freed pages */ } mp_p; uint16_t mp_pad; /** @defgroup mdb_page Page Flags @@ -673,8 +699,11 @@ typedef struct MDB_page { /** Address of first usable data byte in a page, after the header */ #define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) + /** ITS#7713, change PAGEBASE to handle 65536 byte pages */ +#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0) + /** Number of nodes on a page */ -#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1) +#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1) /** The amount of space remaining in the page */ #define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) @@ -701,6 +730,9 @@ typedef struct MDB_page { /** The number of overflow pages needed to store the given size. */ #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) + /** Link in #MDB_txn.%mt_loose_pages list */ +#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2)) + /** Header for a single key/data pair within a page. * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. * We guarantee 2-byte alignment for 'MDB_node's. @@ -752,7 +784,7 @@ typedef struct MDB_node { #define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) /** Address of node \b i in page \b p */ -#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) +#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE)) /** Address of the key for the node */ #define NODEKEY(node) (void *)((node)->mn_data) @@ -900,7 +932,7 @@ struct MDB_txn { */ MDB_IDL mt_free_pgs; /** The list of loose pages that became unused and may be reused - * in this transaction. + * in this transaction, linked through #NEXT_LOOSE_PAGE(page). */ MDB_page *mt_loose_pgs; /** The sorted list of dirty pages we temporarily wrote to disk @@ -1362,7 +1394,7 @@ mdb_page_list(MDB_page *mp) total = EVEN(total); } fprintf(stderr, "Total: header %d + contents %d + unused %d\n", - IS_LEAF2(mp) ? PAGEHDRSZ : mp->mp_lower, total, SIZELEFT(mp)); + IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp)); } void @@ -1569,9 +1601,9 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) } } if (loose) { - pgno_t *pp = (pgno_t *)mp->mp_ptrs; - *pp = pgno; - mp->mp_next = mc->mc_txn->mt_loose_pgs; + DPRINTF(("loosen db %d page %"Z"u", DDBI(mc), + mp->mp_pgno)); + NEXT_LOOSE_PAGE(mp) = mc->mc_txn->mt_loose_pgs; mc->mc_txn->mt_loose_pgs = mp; mp->mp_flags |= P_LOOSE; } else { @@ -1593,7 +1625,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) static int mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) { - enum { Mask = P_SUBP|P_DIRTY|P_KEEP }; + enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP }; MDB_txn *txn = mc->mc_txn; MDB_cursor *m3; MDB_xcursor *mx; @@ -1631,12 +1663,6 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) break; } - /* Loose pages shouldn't be spilled */ - for (dp = txn->mt_loose_pgs; dp; dp=dp->mp_next) { - if ((dp->mp_flags & Mask) == pflags) - dp->mp_flags ^= P_KEEP; - } - if (all) { /* Mark dirty root pages */ for (i=0; imt_numdbs; i++) { @@ -1750,7 +1776,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) for (i=dl[0].mid; i && need; i--) { MDB_ID pn = dl[i].mid << 1; dp = dl[i].mptr; - if (dp->mp_flags & P_KEEP) + if (dp->mp_flags & (P_LOOSE|P_KEEP)) continue; /* Can't spill twice, make sure it's not already in a parent's * spill list. @@ -1866,11 +1892,10 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) /* If there are any loose pages, just use them */ if (num == 1 && txn->mt_loose_pgs) { - pgno_t *pp; np = txn->mt_loose_pgs; - txn->mt_loose_pgs = np->mp_next; - pp = (pgno_t *)np->mp_ptrs; - np->mp_pgno = *pp; + txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np); + DPRINTF(("db %d use loose page %"Z"u", DDBI(mc), + np->mp_pgno)); *mp = np; return MDB_SUCCESS; } @@ -2021,8 +2046,8 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize) * alignment so memcpy may copy words instead of bytes. */ if ((unused &= -Align) && !IS_LEAF2(src)) { - upper &= -Align; - memcpy(dst, src, (lower + (Align-1)) & -Align); + upper = (upper + PAGEBASE) & -Align; + memcpy(dst, src, (lower + PAGEBASE + (Align-1)) & -Align); memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper), psize - upper); } else { @@ -2389,7 +2414,7 @@ mdb_txn_renew0(MDB_txn *txn) return MDB_BAD_RSLOT; } else { MDB_PID_T pid = env->me_pid; - pthread_t tid = pthread_self(); + MDB_THR_T tid = pthread_self(); if (!env->me_live_reader) { rc = mdb_reader_pid(env, Pidset, pid); @@ -2741,28 +2766,22 @@ mdb_freelist_save(MDB_txn *txn) */ if (txn->mt_loose_pgs) { MDB_page *mp = txn->mt_loose_pgs; - pgno_t *pp; /* Just return them to freeDB */ if (env->me_pghead) { int i, j; mop = env->me_pghead; - while(mp) { - pgno_t pg; - pp = (pgno_t *)mp->mp_ptrs; - pg = *pp; + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) { + pgno_t pg = mp->mp_pgno; j = mop[0] + 1; for (i = mop[0]; i && mop[i] < pg; i--) mop[j--] = mop[i]; mop[j] = pg; mop[0] += 1; - mp = mp->mp_next; } } else { /* Oh well, they were wasted. Put on freelist */ - while(mp) { - pp = (pgno_t *)mp->mp_ptrs; - mdb_midl_append(&txn->mt_free_pgs, *pp); - mp = mp->mp_next; + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) { + mdb_midl_append(&txn->mt_free_pgs, mp->mp_pgno); } } txn->mt_loose_pgs = NULL; @@ -2930,8 +2949,8 @@ mdb_page_flush(MDB_txn *txn, int keep) while (++i <= pagecount) { dp = dl[i].mptr; /* Don't flush this page yet */ - if (dp->mp_flags & P_KEEP) { - dp->mp_flags ^= P_KEEP; + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; dl[++j] = dl[i]; continue; } @@ -2945,8 +2964,8 @@ mdb_page_flush(MDB_txn *txn, int keep) if (++i <= pagecount) { dp = dl[i].mptr; /* Don't flush this page yet */ - if (dp->mp_flags & P_KEEP) { - dp->mp_flags ^= P_KEEP; + if (dp->mp_flags & (P_LOOSE|P_KEEP)) { + dp->mp_flags &= ~P_KEEP; dl[i].mid = 0; continue; } @@ -3075,6 +3094,7 @@ mdb_txn_commit(MDB_txn *txn) if (txn->mt_parent) { MDB_txn *parent = txn->mt_parent; + MDB_page **lp; MDB_ID2L dst, src; MDB_IDL pspill; unsigned x, y, len, ps_len; @@ -3172,6 +3192,11 @@ mdb_txn_commit(MDB_txn *txn) } } + /* Append our loose page list to parent's */ + for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp)) + ; + *lp = txn->mt_loose_pgs; + parent->mt_child = NULL; mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead); free(txn); @@ -3251,7 +3276,7 @@ fail: * @param[out] meta address of where to store the meta information * @return 0 on success, non-zero on failure. */ -static int +static int ESECT mdb_env_read_header(MDB_env *env, MDB_meta *meta) { MDB_metabuf pbuf; @@ -3309,12 +3334,26 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) return 0; } +static void ESECT +mdb_env_init_meta0(MDB_env *env, MDB_meta *meta) +{ + meta->mm_magic = MDB_MAGIC; + meta->mm_version = MDB_DATA_VERSION; + meta->mm_mapsize = env->me_mapsize; + meta->mm_psize = env->me_psize; + meta->mm_last_pg = 1; + meta->mm_flags = env->me_flags & 0xffff; + meta->mm_flags |= MDB_INTEGERKEY; + meta->mm_dbs[0].md_root = P_INVALID; + meta->mm_dbs[1].md_root = P_INVALID; +} + /** Write the environment parameters of a freshly created DB environment. * @param[in] env the environment handle * @param[out] meta address of where to store the meta information * @return 0 on success, non-zero on failure. */ -static int +static int ESECT mdb_env_init_meta(MDB_env *env, MDB_meta *meta) { MDB_page *p, *q; @@ -3338,15 +3377,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) psize = env->me_psize; - meta->mm_magic = MDB_MAGIC; - meta->mm_version = MDB_DATA_VERSION; - meta->mm_mapsize = env->me_mapsize; - meta->mm_psize = psize; - meta->mm_last_pg = 1; - meta->mm_flags = env->me_flags & 0xffff; - meta->mm_flags |= MDB_INTEGERKEY; - meta->mm_dbs[0].md_root = P_INVALID; - meta->mm_dbs[1].md_root = P_INVALID; + mdb_env_init_meta0(env, meta); p = calloc(2, psize); p->mp_pgno = 0; @@ -3502,7 +3533,7 @@ mdb_env_pick_meta(const MDB_env *env) return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); } -int +int ESECT mdb_env_create(MDB_env **env) { MDB_env *e; @@ -3527,7 +3558,7 @@ mdb_env_create(MDB_env **env) return MDB_SUCCESS; } -static int +static int ESECT mdb_env_map(MDB_env *env, void *addr, int newsize) { MDB_page *p; @@ -3536,8 +3567,17 @@ mdb_env_map(MDB_env *env, void *addr, int newsize) int rc; HANDLE mh; LONG sizelo, sizehi; - sizelo = env->me_mapsize & 0xffffffff; - sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */ + size_t msize; + + if (flags & MDB_RDONLY) { + msize = 0; + sizelo = 0; + sizehi = 0; + } else { + msize = env->me_mapsize; + sizelo = msize & 0xffffffff; + sizehi = msize >> 16 >> 16; /* only needed on Win64 */ + } /* Windows won't create mappings for zero length files. * Just allocate the maxsize right now. @@ -3555,7 +3595,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize) return ErrCode(); env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? FILE_MAP_WRITE : FILE_MAP_READ, - 0, 0, env->me_mapsize, addr); + 0, 0, msize, addr); rc = env->me_map ? 0 : ErrCode(); CloseHandle(mh); if (rc) @@ -3601,7 +3641,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_mapsize(MDB_env *env, size_t size) { /* If env is already open, caller is responsible for making @@ -3635,7 +3675,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) { if (env->me_map) @@ -3644,7 +3684,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) { if (env->me_map || readers < 1) @@ -3653,7 +3693,7 @@ mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) { if (!env || !readers) @@ -3664,7 +3704,7 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) /** Further setup required for opening an LMDB environment */ -static int +static int ESECT mdb_env_open2(MDB_env *env) { unsigned int flags = env->me_flags; @@ -3821,7 +3861,7 @@ PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback; #endif /** Downgrade the exclusive lock on the region back to shared */ -static int +static int ESECT mdb_env_share_locks(MDB_env *env, int *excl) { int rc = 0, toggle = mdb_env_pick_meta(env); @@ -3863,7 +3903,7 @@ mdb_env_share_locks(MDB_env *env, int *excl) /** Try to get exlusive lock, otherwise shared. * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive. */ -static int +static int ESECT mdb_env_excl_lock(MDB_env *env, int *excl) { int rc = 0; @@ -4005,7 +4045,7 @@ mdb_hash_enc(MDB_val *val, char *encbuf) * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive * @return 0 on success, non-zero on failure. */ -static int +static int ESECT mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) { #ifdef _WIN32 @@ -4235,7 +4275,7 @@ fail: # error "Persistent DB flags & env flags overlap, but both go in mm_flags" #endif -int +int ESECT mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) { int oflags, rc, len, excl = -1; @@ -4363,7 +4403,7 @@ leave: } /** Destroy resources from mdb_env_open(), clear our readers & DBIs */ -static void +static void ESECT mdb_env_close0(MDB_env *env, int excl) { int i; @@ -4451,153 +4491,8 @@ mdb_env_close0(MDB_env *env, int excl) env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); } -int -mdb_env_copyfd(MDB_env *env, HANDLE fd) -{ - MDB_txn *txn = NULL; - int rc; - size_t wsize; - char *ptr; -#ifdef _WIN32 - DWORD len, w2; -#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) -#else - ssize_t len; - size_t w2; -#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) -#endif - /* Do the lock/unlock of the reader mutex before starting the - * write txn. Otherwise other read txns could block writers. - */ - rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - if (rc) - return rc; - - if (env->me_txns) { - /* We must start the actual read txn after blocking writers */ - mdb_txn_reset0(txn, "reset-stage1"); - - /* Temporarily block writers until we snapshot the meta pages */ - LOCK_MUTEX_W(env); - - rc = mdb_txn_renew0(txn); - if (rc) { - UNLOCK_MUTEX_W(env); - goto leave; - } - } - - wsize = env->me_psize * 2; - ptr = env->me_map; - w2 = wsize; - while (w2 > 0) { - DO_WRITE(rc, fd, ptr, w2, len); - if (!rc) { - rc = ErrCode(); - break; - } else if (len > 0) { - rc = MDB_SUCCESS; - ptr += len; - w2 -= len; - continue; - } else { - /* Non-blocking or async handles are not supported */ - rc = EIO; - break; - } - } - if (env->me_txns) - UNLOCK_MUTEX_W(env); - - if (rc) - goto leave; - - wsize = txn->mt_next_pgno * env->me_psize - wsize; - while (wsize > 0) { - if (wsize > MAX_WRITE) - w2 = MAX_WRITE; - else - w2 = wsize; - DO_WRITE(rc, fd, ptr, w2, len); - if (!rc) { - rc = ErrCode(); - break; - } else if (len > 0) { - rc = MDB_SUCCESS; - ptr += len; - wsize -= len; - continue; - } else { - rc = EIO; - break; - } - } - -leave: - mdb_txn_abort(txn); - return rc; -} - -int -mdb_env_copy(MDB_env *env, const char *path) -{ - int rc, len; - char *lpath; - HANDLE newfd = INVALID_HANDLE_VALUE; - - if (env->me_flags & MDB_NOSUBDIR) { - lpath = (char *)path; - } else { - len = strlen(path); - len += sizeof(DATANAME); - lpath = malloc(len); - if (!lpath) - return ENOMEM; - sprintf(lpath, "%s" DATANAME, path); - } - - /* The destination path must exist, but the destination file must not. - * We don't want the OS to cache the writes, since the source data is - * already in the OS cache. - */ -#ifdef _WIN32 - newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, - FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); -#else - newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); -#endif - if (newfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } - -#ifdef O_DIRECT - /* Set O_DIRECT if the file system supports it */ - if ((rc = fcntl(newfd, F_GETFL)) != -1) - (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); -#endif -#ifdef F_NOCACHE /* __APPLE__ */ - rc = fcntl(newfd, F_NOCACHE, 1); - if (rc) { - rc = ErrCode(); - goto leave; - } -#endif - - rc = mdb_env_copyfd(env, newfd); - -leave: - if (!(env->me_flags & MDB_NOSUBDIR)) - free(lpath); - if (newfd != INVALID_HANDLE_VALUE) - if (close(newfd) < 0 && rc == MDB_SUCCESS) - rc = ErrCode(); - - return rc; -} - -void +void ESECT mdb_env_close(MDB_env *env) { MDB_page *dp; @@ -6050,11 +5945,14 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if ((mc->mc_db->md_flags & MDB_DUPSORT) && LEAFSIZE(key, data) > env->me_nodemax) { - /* Too big for a node, insert in sub-DB */ + /* Too big for a node, insert in sub-DB. Set up an empty + * "old sub-page" for prep_subDB to expand to a full page. + */ fp_flags = P_LEAF|P_DIRTY; fp = env->me_pbuf; fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */ - fp->mp_lower = fp->mp_upper = olddata.mv_size = PAGEHDRSZ; + fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE); + olddata.mv_size = PAGEHDRSZ; goto prep_subDB; } } else { @@ -6109,7 +6007,7 @@ more: /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; - fp->mp_lower = PAGEHDRSZ; + fp->mp_lower = (PAGEHDRSZ-PAGEBASE); xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size; if (mc->mc_db->md_flags & MDB_DUPFIXED) { fp->mp_flags |= P_LEAF2; @@ -6119,8 +6017,8 @@ more: xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) + (dkey.mv_size & 1) + (data->mv_size & 1); } - fp->mp_upper = xdata.mv_size; - olddata.mv_size = fp->mp_upper; /* pretend olddata is fp */ + fp->mp_upper = xdata.mv_size - PAGEBASE; + olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */ } else if (leaf->mn_flags & F_SUBDATA) { /* Data is on sub-DB, just store it */ flags |= F_DUPDATA|F_SUBDATA; @@ -6187,8 +6085,8 @@ prep_subDB: if (fp_flags & P_LEAF2) { memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad); } else { - memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper, - olddata.mv_size - fp->mp_upper); + memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, + olddata.mv_size - fp->mp_upper - PAGEBASE); for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; } @@ -6509,8 +6407,8 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) DPRINTF(("allocated new mpage %"Z"u, page size %u", np->mp_pgno, mc->mc_txn->mt_env->me_psize)); np->mp_flags = flags | P_DIRTY; - np->mp_lower = PAGEHDRSZ; - np->mp_upper = mc->mc_txn->mt_env->me_psize; + np->mp_lower = (PAGEHDRSZ-PAGEBASE); + np->mp_upper = mc->mc_txn->mt_env->me_psize - PAGEBASE; if (IS_BRANCH(np)) mc->mc_db->md_branch_pages++; @@ -6763,7 +6661,7 @@ mdb_node_del(MDB_cursor *mc, int ksize) } } - base = (char *)mp + mp->mp_upper; + base = (char *)mp + mp->mp_upper + PAGEBASE; memmove(base + sz, base, ptr - mp->mp_upper); mp->mp_lower -= sizeof(indx_t); @@ -6817,7 +6715,7 @@ mdb_node_shrink(MDB_page *mp, indx_t indx) mp->mp_ptrs[i] += delta; } - base = (char *)mp + mp->mp_upper; + base = (char *)mp + mp->mp_upper + PAGEBASE; memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); mp->mp_upper += delta; } @@ -7089,7 +6987,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) mp->mp_ptrs[i] -= delta; } - base = (char *)mp + mp->mp_upper; + base = (char *)mp + mp->mp_upper + PAGEBASE; len = ptr - mp->mp_upper + NODESIZE; memmove(base - delta, base, len); mp->mp_upper -= delta; @@ -7645,8 +7543,10 @@ mdb_cursor_del0(MDB_cursor *mc) /* if mc points past last node in page, find next sibling */ if (mc->mc_ki[mc->mc_top] >= nkeys) { rc = mdb_cursor_sibling(mc, 1); - if (rc == MDB_NOTFOUND) + if (rc == MDB_NOTFOUND) { + mc->mc_flags |= C_EOF; rc = MDB_SUCCESS; + } } /* Adjust other cursors pointing to mp */ @@ -7664,8 +7564,10 @@ mdb_cursor_del0(MDB_cursor *mc) } if (m3->mc_ki[mc->mc_top] >= nkeys) { rc = mdb_cursor_sibling(m3, 1); - if (rc == MDB_NOTFOUND) + if (rc == MDB_NOTFOUND) { + m3->mc_flags |= C_EOF; rc = MDB_SUCCESS; + } } } } @@ -7880,8 +7782,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno } copy->mp_pgno = mp->mp_pgno; copy->mp_flags = mp->mp_flags; - copy->mp_lower = PAGEHDRSZ; - copy->mp_upper = env->me_psize; + copy->mp_lower = (PAGEHDRSZ-PAGEBASE); + copy->mp_upper = env->me_psize - PAGEBASE; /* prepare to insert */ for (i=0, j=0; imp_ptrs[i]); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t); if (IS_LEAF(mp)) { if (F_ISSET(node->mn_flags, F_BIGDATA)) @@ -7941,7 +7843,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno sepkey.mv_size = newkey->mv_size; sepkey.mv_data = newkey->mv_data; } else { - node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx]); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + PAGEBASE); sepkey.mv_size = node->mn_ksize; sepkey.mv_data = NODEKEY(node); } @@ -8022,7 +7924,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno /* Update index for the new key. */ mc->mc_ki[mc->mc_top] = j; } else { - node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]); + node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); rkey.mv_data = NODEKEY(node); rkey.mv_size = node->mn_ksize; if (IS_LEAF(mp)) { @@ -8058,7 +7960,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno mp->mp_lower = copy->mp_lower; mp->mp_upper = copy->mp_upper; memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1), - env->me_psize - copy->mp_upper); + env->me_psize - copy->mp_upper - PAGEBASE); /* reset back to original page */ if (newindx < split_indx) { @@ -8157,7 +8059,559 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, return mdb_cursor_put(&mc, key, data, flags); } -int +#ifndef MDB_WBUF +#define MDB_WBUF (1024*1024) +#endif + + /** State needed for a compacting copy. */ +typedef struct mdb_copy { + pthread_mutex_t mc_mutex; + pthread_cond_t mc_cond; + char *mc_wbuf[2]; + char *mc_over[2]; + MDB_env *mc_env; + MDB_txn *mc_txn; + int mc_wlen[2]; + int mc_olen[2]; + pgno_t mc_next_pgno; + HANDLE mc_fd; + int mc_status; + volatile int mc_new; + int mc_toggle; + +} mdb_copy; + + /** Dedicated writer thread for compacting copy. */ +static THREAD_RET ESECT +mdb_env_copythr(void *arg) +{ + mdb_copy *my = arg; + char *ptr; + int toggle = 0, wsize, rc; +#ifdef _WIN32 + DWORD len; +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else + int len; +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + pthread_mutex_lock(&my->mc_mutex); + my->mc_new = 0; + pthread_cond_signal(&my->mc_cond); + for(;;) { + while (!my->mc_new) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + if (my->mc_new < 0) { + my->mc_new = 0; + break; + } + my->mc_new = 0; + wsize = my->mc_wlen[toggle]; + ptr = my->mc_wbuf[toggle]; +again: + while (wsize > 0) { + DO_WRITE(rc, my->mc_fd, ptr, wsize, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + if (rc) { + my->mc_status = rc; + break; + } + /* If there's an overflow page tail, write it too */ + if (my->mc_olen[toggle]) { + wsize = my->mc_olen[toggle]; + ptr = my->mc_over[toggle]; + my->mc_olen[toggle] = 0; + goto again; + } + my->mc_wlen[toggle] = 0; + toggle ^= 1; + pthread_cond_signal(&my->mc_cond); + } + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); + return (THREAD_RET)0; +#undef DO_WRITE +} + + /** Tell the writer thread there's a buffer ready to write */ +static int ESECT +mdb_env_cthr_toggle(mdb_copy *my, int st) +{ + int toggle = my->mc_toggle ^ 1; + pthread_mutex_lock(&my->mc_mutex); + if (my->mc_status) { + pthread_mutex_unlock(&my->mc_mutex); + return my->mc_status; + } + while (my->mc_new == 1) + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); + my->mc_new = st; + my->mc_toggle = toggle; + pthread_cond_signal(&my->mc_cond); + pthread_mutex_unlock(&my->mc_mutex); + return 0; +} + + /** Depth-first tree traversal for compacting copy. */ +static int ESECT +mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) +{ + MDB_cursor mc; + MDB_txn *txn = my->mc_txn; + MDB_node *ni; + MDB_page *mo, *mp, *leaf; + char *buf, *ptr; + int rc, toggle; + unsigned int i; + + /* Empty DB, nothing to do */ + if (*pg == P_INVALID) + return MDB_SUCCESS; + + mc.mc_snum = 1; + mc.mc_top = 0; + mc.mc_txn = txn; + + rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL); + if (rc) + return rc; + rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST); + if (rc) + return rc; + + /* Make cursor pages writable */ + buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum); + if (buf == NULL) + return ENOMEM; + + for (i=0; imc_env->me_psize); + mc.mc_pg[i] = (MDB_page *)ptr; + ptr += my->mc_env->me_psize; + } + + /* This is writable space for a leaf page. Usually not needed. */ + leaf = (MDB_page *)ptr; + + toggle = my->mc_toggle; + while (mc.mc_snum > 0) { + unsigned n; + mp = mc.mc_pg[mc.mc_top]; + n = NUMKEYS(mp); + + if (IS_LEAF(mp)) { + if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) { + for (i=0; imn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc) + goto done; + if (my->mc_wlen[toggle] >= MDB_WBUF) { + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + memcpy(mo, omp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno; + my->mc_next_pgno += omp->mp_pages; + my->mc_wlen[toggle] += my->mc_env->me_psize; + if (omp->mp_pages > 1) { + my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1); + my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize; + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); + } else if (ni->mn_flags & F_SUBDATA) { + MDB_db db; + + /* Need writable leaf */ + if (mp != leaf) { + mc.mc_pg[mc.mc_top] = leaf; + mdb_page_copy(leaf, mp, my->mc_env->me_psize); + mp = leaf; + ni = NODEPTR(mp, i); + } + + memcpy(&db, NODEDATA(ni), sizeof(db)); + my->mc_toggle = toggle; + rc = mdb_env_cwalk(my, &db.md_root, ni->mn_flags & F_DUPDATA); + if (rc) + goto done; + toggle = my->mc_toggle; + memcpy(NODEDATA(ni), &db, sizeof(db)); + } + } + } + } else { + mc.mc_ki[mc.mc_top]++; + if (mc.mc_ki[mc.mc_top] < n) { + pgno_t pg; +again: + ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]); + pg = NODEPGNO(ni); + rc = mdb_page_get(txn, pg, &mp, NULL); + if (rc) + goto done; + mc.mc_top++; + mc.mc_snum++; + mc.mc_ki[mc.mc_top] = 0; + if (IS_BRANCH(mp)) { + /* Whenever we advance to a sibling branch page, + * we must proceed all the way down to its first leaf. + */ + mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize); + goto again; + } else + mc.mc_pg[mc.mc_top] = mp; + continue; + } + } + if (my->mc_wlen[toggle] >= MDB_WBUF) { + rc = mdb_env_cthr_toggle(my, 1); + if (rc) + goto done; + toggle = my->mc_toggle; + } + mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); + mdb_page_copy(mo, mp, my->mc_env->me_psize); + mo->mp_pgno = my->mc_next_pgno++; + my->mc_wlen[toggle] += my->mc_env->me_psize; + if (mc.mc_top) { + /* Update parent if there is one */ + ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]); + SETPGNO(ni, mo->mp_pgno); + mdb_cursor_pop(&mc); + } else { + /* Otherwise we're done */ + *pg = mo->mp_pgno; + break; + } + } +done: + free(buf); + return rc; +} + + /** Copy environment with compaction. */ +static int ESECT +mdb_env_copyfd1(MDB_env *env, HANDLE fd) +{ + MDB_meta *mm; + MDB_page *mp; + mdb_copy my; + MDB_txn *txn = NULL; + pthread_t thr; + int rc; + +#ifdef _WIN32 + my.mc_mutex = CreateMutex(NULL, FALSE, NULL); + my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL); + my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_psize); + if (my.mc_wbuf[0] == NULL) + return errno; +#else + pthread_mutex_init(&my.mc_mutex, NULL); + pthread_cond_init(&my.mc_cond, NULL); + rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_psize, MDB_WBUF*2); + if (rc) + return rc; +#endif + my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF; + my.mc_wlen[0] = 0; + my.mc_wlen[1] = 0; + my.mc_olen[0] = 0; + my.mc_olen[1] = 0; + my.mc_next_pgno = 2; + my.mc_status = 0; + my.mc_new = 1; + my.mc_toggle = 0; + my.mc_env = env; + my.mc_fd = fd; + THREAD_CREATE(thr, mdb_env_copythr, &my); + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + mp = (MDB_page *)my.mc_wbuf[0]; + memset(mp, 0, 2*env->me_psize); + mp->mp_pgno = 0; + mp->mp_flags = P_META; + mm = (MDB_meta *)METADATA(mp); + mdb_env_init_meta0(env, mm); + mm->mm_address = env->me_metas[0]->mm_address; + + mp = (MDB_page *)(my.mc_wbuf[0] + env->me_psize); + mp->mp_pgno = 1; + mp->mp_flags = P_META; + *(MDB_meta *)METADATA(mp) = *mm; + mm = (MDB_meta *)METADATA(mp); + + /* Count the number of free pages, subtract from lastpg to find + * number of active pages + */ + { + MDB_ID freecount = 0; + MDB_cursor mc; + MDB_val key, data; + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) + freecount += *(MDB_ID *)data.mv_data; + freecount += txn->mt_dbs[0].md_branch_pages + + txn->mt_dbs[0].md_leaf_pages + + txn->mt_dbs[0].md_overflow_pages; + + /* Set metapage 1 */ + mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; + mm->mm_dbs[1] = txn->mt_dbs[1]; + mm->mm_dbs[1].md_root = mm->mm_last_pg; + mm->mm_txnid = 1; + } + my.mc_wlen[0] = env->me_psize * 2; + my.mc_txn = txn; + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); + rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0); + if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) + rc = mdb_env_cthr_toggle(&my, 1); + mdb_env_cthr_toggle(&my, -1); + pthread_mutex_lock(&my.mc_mutex); + while(my.mc_new) + pthread_cond_wait(&my.mc_cond, &my.mc_mutex); + pthread_mutex_unlock(&my.mc_mutex); + THREAD_FINISH(thr); + + mdb_txn_abort(txn); +#ifdef _WIN32 + CloseHandle(my.mc_cond); + CloseHandle(my.mc_mutex); + _aligned_free(my.mc_wbuf[0]); +#else + pthread_cond_destroy(&my.mc_cond); + pthread_mutex_destroy(&my.mc_mutex); + free(my.mc_wbuf[0]); +#endif + return rc; +} + + /** Copy environment as-is. */ +static int ESECT +mdb_env_copyfd0(MDB_env *env, HANDLE fd) +{ + MDB_txn *txn = NULL; + int rc; + size_t wsize; + char *ptr; +#ifdef _WIN32 + DWORD len, w2; +#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) +#else + ssize_t len; + size_t w2; +#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#endif + + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. + */ + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + if (env->me_txns) { + /* We must start the actual read txn after blocking writers */ + mdb_txn_reset0(txn, "reset-stage1"); + + /* Temporarily block writers until we snapshot the meta pages */ + LOCK_MUTEX_W(env); + + rc = mdb_txn_renew0(txn); + if (rc) { + UNLOCK_MUTEX_W(env); + goto leave; + } + } + + wsize = env->me_psize * 2; + ptr = env->me_map; + w2 = wsize; + while (w2 > 0) { + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + w2 -= len; + continue; + } else { + /* Non-blocking or async handles are not supported */ + rc = EIO; + break; + } + } + if (env->me_txns) + UNLOCK_MUTEX_W(env); + + if (rc) + goto leave; + + w2 = txn->mt_next_pgno * env->me_psize; +#ifdef WIN32 + { + LARGE_INTEGER fsize; + GetFileSizeEx(env->me_fd, &fsize); + if (w2 > fsize.QuadPart) + w2 = fsize.QuadPart; + } +#else + { + struct stat st; + fstat(env->me_fd, &st); + if (w2 > (size_t)st.st_size) + w2 = st.st_size; + } +#endif + wsize = w2 - wsize; + while (wsize > 0) { + if (wsize > MAX_WRITE) + w2 = MAX_WRITE; + else + w2 = wsize; + DO_WRITE(rc, fd, ptr, w2, len); + if (!rc) { + rc = ErrCode(); + break; + } else if (len > 0) { + rc = MDB_SUCCESS; + ptr += len; + wsize -= len; + continue; + } else { + rc = EIO; + break; + } + } + +leave: + mdb_txn_abort(txn); + return rc; +} + +int ESECT +mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags) +{ + if (flags & MDB_CP_COMPACT) + return mdb_env_copyfd1(env, fd); + else + return mdb_env_copyfd0(env, fd); +} + +int ESECT +mdb_env_copyfd(MDB_env *env, HANDLE fd) +{ + return mdb_env_copyfd2(env, fd, 0); +} + +int ESECT +mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) +{ + int rc, len; + char *lpath; + HANDLE newfd = INVALID_HANDLE_VALUE; + + if (env->me_flags & MDB_NOSUBDIR) { + lpath = (char *)path; + } else { + len = strlen(path); + len += sizeof(DATANAME); + lpath = malloc(len); + if (!lpath) + return ENOMEM; + sprintf(lpath, "%s" DATANAME, path); + } + + /* The destination path must exist, but the destination file must not. + * We don't want the OS to cache the writes, since the source data is + * already in the OS cache. + */ +#ifdef _WIN32 + newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); +#else + newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); +#endif + if (newfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + +#ifdef O_DIRECT + /* Set O_DIRECT if the file system supports it */ + if ((rc = fcntl(newfd, F_GETFL)) != -1) + (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); +#endif +#ifdef F_NOCACHE /* __APPLE__ */ + rc = fcntl(newfd, F_NOCACHE, 1); + if (rc) { + rc = ErrCode(); + goto leave; + } +#endif + + rc = mdb_env_copyfd2(env, newfd, flags); + +leave: + if (!(env->me_flags & MDB_NOSUBDIR)) + free(lpath); + if (newfd != INVALID_HANDLE_VALUE) + if (close(newfd) < 0 && rc == MDB_SUCCESS) + rc = ErrCode(); + + return rc; +} + +int ESECT +mdb_env_copy(MDB_env *env, const char *path) +{ + return mdb_env_copy2(env, path, 0); +} + +int ESECT mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) { if ((flag & CHANGEABLE) != flag) @@ -8169,7 +8623,7 @@ mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_flags(MDB_env *env, unsigned int *arg) { if (!env || !arg) @@ -8179,7 +8633,7 @@ mdb_env_get_flags(MDB_env *env, unsigned int *arg) return MDB_SUCCESS; } -int +int ESECT mdb_env_set_userctx(MDB_env *env, void *ctx) { if (!env) @@ -8188,13 +8642,13 @@ mdb_env_set_userctx(MDB_env *env, void *ctx) return MDB_SUCCESS; } -void * +void * ESECT mdb_env_get_userctx(MDB_env *env) { return env ? env->me_userctx : NULL; } -int +int ESECT mdb_env_set_assert(MDB_env *env, MDB_assert_func *func) { if (!env) @@ -8205,7 +8659,7 @@ mdb_env_set_assert(MDB_env *env, MDB_assert_func *func) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_path(MDB_env *env, const char **arg) { if (!env || !arg) @@ -8215,7 +8669,7 @@ mdb_env_get_path(MDB_env *env, const char **arg) return MDB_SUCCESS; } -int +int ESECT mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg) { if (!env || !arg) @@ -8231,7 +8685,7 @@ mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg) * @param[out] arg the address of an #MDB_stat structure to receive the stats. * @return 0, this function always succeeds. */ -static int +static int ESECT mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) { arg->ms_psize = env->me_psize; @@ -8243,7 +8697,8 @@ mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) return MDB_SUCCESS; } -int + +int ESECT mdb_env_stat(MDB_env *env, MDB_stat *arg) { int toggle; @@ -8256,7 +8711,7 @@ mdb_env_stat(MDB_env *env, MDB_stat *arg) return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); } -int +int ESECT mdb_env_info(MDB_env *env, MDB_envinfo *arg) { int toggle; @@ -8265,7 +8720,7 @@ mdb_env_info(MDB_env *env, MDB_envinfo *arg) return EINVAL; toggle = mdb_env_pick_meta(env); - arg->me_mapaddr = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : 0; + arg->me_mapaddr = env->me_metas[toggle]->mm_address; arg->me_mapsize = env->me_mapsize; arg->me_maxreaders = env->me_maxreaders; @@ -8613,12 +9068,14 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx) return MDB_SUCCESS; } -int mdb_env_get_maxkeysize(MDB_env *env) +int ESECT +mdb_env_get_maxkeysize(MDB_env *env) { return ENV_MAXKEY(env); } -int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) +int ESECT +mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) { unsigned int i, rdrs; MDB_reader *mr; @@ -8658,7 +9115,8 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) /** Insert pid into list if not already present. * return -1 if already present. */ -static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) +static int ESECT +mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) { /* binary search of pid in list */ unsigned base = 0; @@ -8694,7 +9152,8 @@ static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid) return 0; } -int mdb_reader_check(MDB_env *env, int *dead) +int ESECT +mdb_reader_check(MDB_env *env, int *dead) { unsigned int i, j, rdrs; MDB_reader *mr; diff --git a/libraries/liblmdb/mdb_copy.1 b/libraries/liblmdb/mdb_copy.1 index 58c6c5b60c..094b260563 100644 --- a/libraries/liblmdb/mdb_copy.1 +++ b/libraries/liblmdb/mdb_copy.1 @@ -8,6 +8,8 @@ mdb_copy \- LMDB environment copy tool [\c .BR \-V ] [\c +.BR \-c ] +[\c .BR \-n ] .B srcpath [\c @@ -30,6 +32,11 @@ written to stdout. .BR \-V Write the library version number to the standard output, and exit. .TP +.BR \-c +Compact while copying. Only current data pages will be copied; freed +or unused pages will be omitted from the copy. This option will +slow down the backup process as it is more CPU-intensive. +.TP .BR \-n Open LDMB environment(s) which do not use subdirectories. diff --git a/libraries/liblmdb/mdb_copy.c b/libraries/liblmdb/mdb_copy.c index 87525c0682..c54fefe9f1 100644 --- a/libraries/liblmdb/mdb_copy.c +++ b/libraries/liblmdb/mdb_copy.c @@ -33,10 +33,13 @@ int main(int argc,char * argv[]) MDB_env *env; const char *progname = argv[0], *act; unsigned flags = MDB_RDONLY; + unsigned cpflags = 0; for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) { if (argv[1][1] == 'n' && argv[1][2] == '\0') flags |= MDB_NOSUBDIR; + else if (argv[1][1] == 'c' && argv[1][2] == '\0') + cpflags |= MDB_CP_COMPACT; else if (argv[1][1] == 'V' && argv[1][2] == '\0') { printf("%s\n", MDB_VERSION_STRING); exit(0); @@ -45,7 +48,7 @@ int main(int argc,char * argv[]) } if (argc<2 || argc>3) { - fprintf(stderr, "usage: %s [-V] [-n] srcpath [dstpath]\n", progname); + fprintf(stderr, "usage: %s [-V] [-c] [-n] srcpath [dstpath]\n", progname); exit(EXIT_FAILURE); } @@ -66,9 +69,9 @@ int main(int argc,char * argv[]) if (rc == MDB_SUCCESS) { act = "copying"; if (argc == 2) - rc = mdb_env_copyfd(env, MDB_STDOUT); + rc = mdb_env_copyfd2(env, MDB_STDOUT, cpflags); else - rc = mdb_env_copy(env, argv[2]); + rc = mdb_env_copy2(env, argv[2], cpflags); } if (rc) fprintf(stderr, "%s: %s failed, error %d (%s)\n", diff --git a/libraries/liblmdb/mdb_dump.c b/libraries/liblmdb/mdb_dump.c index 5242519ebc..3b01f9643d 100644 --- a/libraries/liblmdb/mdb_dump.c +++ b/libraries/liblmdb/mdb_dump.c @@ -17,8 +17,15 @@ #include #include #include +#include #include "lmdb.h" +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + #define PRINT 1 static int mode; @@ -37,6 +44,13 @@ flagbit dbflags[] = { { 0, NULL } }; +static volatile sig_atomic_t gotsig; + +static void dumpsig( int sig ) +{ + gotsig=1; +} + static const char hexc[] = "0123456789abcdef"; static void hex(unsigned char c) @@ -83,6 +97,7 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) MDB_cursor *mc; MDB_stat ms; MDB_val key, data; + MDB_envinfo info; unsigned int flags; int rc, i; @@ -92,11 +107,18 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) rc = mdb_stat(txn, dbi, &ms); if (rc) return rc; + rc = mdb_env_info(mdb_txn_env(txn), &info); + if (rc) return rc; + printf("VERSION=3\n"); printf("format=%s\n", mode & PRINT ? "print" : "bytevalue"); if (name) printf("database=%s\n", name); printf("type=btree\n"); + printf("mapsize=%" Z "u\n", info.me_mapsize); + if (info.me_mapaddr) + printf("mapaddr=%p\n", info.me_mapaddr); + printf("maxreaders=%u\n", info.me_maxreaders); if (flags & MDB_DUPSORT) printf("duplicates=1\n"); @@ -112,6 +134,10 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) if (rc) return rc; while ((rc = mdb_cursor_get(mc, &key, &data, MDB_NEXT) == MDB_SUCCESS)) { + if (gotsig) { + rc = EINTR; + break; + } if (mode & PRINT) { text(&key); text(&data); @@ -196,6 +222,15 @@ int main(int argc, char *argv[]) if (optind != argc - 1) usage(prog); +#ifdef SIGPIPE + signal(SIGPIPE, dumpsig); +#endif +#ifdef SIGHUP + signal(SIGHUP, dumpsig); +#endif + signal(SIGINT, dumpsig); + signal(SIGTERM, dumpsig); + envname = argv[optind]; rc = mdb_env_create(&env); @@ -205,19 +240,19 @@ int main(int argc, char *argv[]) rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_open(txn, subname, 0, &dbi); if (rc) { - printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } @@ -228,7 +263,7 @@ int main(int argc, char *argv[]) rc = mdb_cursor_open(txn, dbi, &cursor); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { @@ -247,6 +282,8 @@ int main(int argc, char *argv[]) list++; } else { rc = dumpit(txn, db2, str); + if (rc) + break; } mdb_close(env, db2); } diff --git a/libraries/liblmdb/mdb_load.c b/libraries/liblmdb/mdb_load.c index 0cf02ada5e..17f4757330 100644 --- a/libraries/liblmdb/mdb_load.c +++ b/libraries/liblmdb/mdb_load.c @@ -32,10 +32,18 @@ static int flags; static char *prog; -static int eof; +static int Eof; + +static MDB_envinfo info; static MDB_val kbuf, dbuf; +#ifdef _WIN32 +#define Z "I" +#else +#define Z "z" +#endif + #define STRLENOF(s) (sizeof(s)-1) typedef struct flagbit { @@ -67,7 +75,7 @@ static void readhdr() if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); if (version > 3) { - fprintf(stderr, "%s: line %zd: unsupported VERSION %d\n", + fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", prog, lineno, version); exit(EXIT_FAILURE); } @@ -77,7 +85,7 @@ static void readhdr() if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) mode |= PRINT; else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { - fprintf(stderr, "%s: line %zd: unsupported FORMAT %s\n", + fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); exit(EXIT_FAILURE); } @@ -88,10 +96,40 @@ static void readhdr() subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { - fprintf(stderr, "%s: line %zd: unsupported type %s\n", + fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); exit(EXIT_FAILURE); } + } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); + exit(EXIT_FAILURE); + } + } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) { + int i; + ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); + if (ptr) *ptr = '\0'; + i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); + if (i != 1) { + fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", + prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); + exit(EXIT_FAILURE); + } } else { int i; for (i=0; dbflags[i].bit; i++) { @@ -104,12 +142,12 @@ static void readhdr() if (!dbflags[i].bit) { ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); if (!ptr) { - fprintf(stderr, "%s: line %zd: unexpected format\n", + fprintf(stderr, "%s: line %" Z "d: unexpected format\n", prog, lineno); exit(EXIT_FAILURE); } else { *ptr = '\0'; - fprintf(stderr, "%s: line %zd: unrecognized keyword ignored: %s\n", + fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", prog, lineno, (char *)dbuf.mv_data); } } @@ -119,7 +157,7 @@ static void readhdr() static void badend() { - fprintf(stderr, "%s: line %zd: unexpected end of input\n", + fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", prog, lineno); } @@ -146,14 +184,14 @@ static int readline(MDB_val *out, MDB_val *buf) if (!(mode & NOHDR)) { c = fgetc(stdin); if (c == EOF) { - eof = 1; + Eof = 1; return EOF; } if (c != ' ') { lineno++; if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { badend: - eof = 1; + Eof = 1; badend(); return EOF; } @@ -163,7 +201,7 @@ badend: } } if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { - eof = 1; + Eof = 1; return EOF; } lineno++; @@ -175,15 +213,15 @@ badend: while (c1[len-1] != '\n') { buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); if (!buf->mv_data) { - eof = 1; - fprintf(stderr, "%s: line %zd: out of memory, line too long\n", + Eof = 1; + fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", prog, lineno); return EOF; } c1 = buf->mv_data; c1 += buf->mv_size; if (fgets((char *)c1, buf->mv_size, stdin) == NULL) { - eof = 1; + Eof = 1; badend(); return EOF; } @@ -202,7 +240,7 @@ badend: c1++; c2 += 2; } else { if (c2+3 >= end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { - eof = 1; + Eof = 1; badend(); return EOF; } @@ -216,13 +254,13 @@ badend: } else { /* odd length not allowed */ if (len & 1) { - eof = 1; + Eof = 1; badend(); return EOF; } while (c2 < end) { if (!isxdigit(*c2) || !isxdigit(c2[1])) { - eof = 1; + Eof = 1; badend(); return EOF; } @@ -251,6 +289,7 @@ int main(int argc, char *argv[]) MDB_dbi dbi; char *envname; int envflags = 0, putflags = 0; + int dohdr = 0; prog = argv[0]; @@ -298,45 +337,60 @@ int main(int argc, char *argv[]) if (optind != argc - 1) usage(prog); + dbuf.mv_size = 4096; + dbuf.mv_data = malloc(dbuf.mv_size); + + if (!(mode & NOHDR)) + readhdr(); + envname = argv[optind]; rc = mdb_env_create(&env); mdb_env_set_maxdbs(env, 2); + if (info.me_maxreaders) + mdb_env_set_maxreaders(env, info.me_maxreaders); + + if (info.me_mapsize) + mdb_env_set_mapsize(env, info.me_mapsize); + + if (info.me_mapaddr) + envflags |= MDB_FIXEDMAP; + rc = mdb_env_open(env, envname, envflags, 0664); if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; kbuf.mv_data = malloc(kbuf.mv_size); - dbuf.mv_size = 4096; - dbuf.mv_data = malloc(dbuf.mv_size); - while(!eof) { + while(!Eof) { MDB_val key, data; int batch = 0; flags = 0; - if (!(mode & NOHDR)) + if (!dohdr) { + dohdr = 1; + } else if (!(mode & NOHDR)) readhdr(); rc = mdb_txn_begin(env, NULL, 0, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); if (rc) { - printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } rc = mdb_cursor_open(txn, dbi, &mc); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } @@ -360,18 +414,18 @@ int main(int argc, char *argv[]) if (batch == 100) { rc = mdb_txn_commit(txn); if (rc) { - fprintf(stderr, "%s: line %zd: txn_commit: %s\n", + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", prog, lineno, mdb_strerror(rc)); goto env_close; } rc = mdb_txn_begin(env, NULL, 0, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } rc = mdb_cursor_open(txn, dbi, &mc); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } batch = 0; @@ -380,7 +434,7 @@ int main(int argc, char *argv[]) rc = mdb_txn_commit(txn); txn = NULL; if (rc) { - fprintf(stderr, "%s: line %zd: txn_commit: %s\n", + fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", prog, lineno, mdb_strerror(rc)); goto env_close; } diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c index eac2c60274..25c092c04f 100644 --- a/libraries/liblmdb/mdb_stat.c +++ b/libraries/liblmdb/mdb_stat.c @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } @@ -145,7 +145,7 @@ int main(int argc, char *argv[]) rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); goto env_close; } @@ -158,12 +158,12 @@ int main(int argc, char *argv[]) dbi = 0; rc = mdb_cursor_open(txn, dbi, &cursor); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } rc = mdb_stat(txn, dbi, &mst); if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } prstat(&mst); @@ -201,13 +201,13 @@ int main(int argc, char *argv[]) rc = mdb_open(txn, subname, 0, &dbi); if (rc) { - printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } rc = mdb_stat(txn, dbi, &mst); if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } printf("Status of %s\n", subname ? subname : "Main DB"); @@ -219,7 +219,7 @@ int main(int argc, char *argv[]) rc = mdb_cursor_open(txn, dbi, &cursor); if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { @@ -237,7 +237,7 @@ int main(int argc, char *argv[]) if (rc) continue; rc = mdb_stat(txn, db2, &mst); if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + fprintf(stderr, "mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); goto txn_abort; } prstat(&mst);