From c2f6f04dba9006935bb89b5f0d08102e2776a4ab Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 12 Jan 2016 23:18:06 +0100 Subject: [PATCH 01/39] lmdb.h Caveats: Reserved vs. actual mem/disk usage --- libraries/liblmdb/lmdb.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index c4d05d1791..14a05462a3 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -77,6 +77,11 @@ * access to locks and lock file. Exceptions: On read-only filesystems * or with the #MDB_NOLOCK flag described under #mdb_env_open(). * + * - An LMDB configuration will often reserve considerable \b unused + * memory address space and maybe file size for future growth. + * This does not use actual memory or disk space, but users may need + * to understand the difference so they won't be scared off. + * * - By default, in versions before 0.9.10, unused portions of the data * file might receive garbage data from memory freed by other code. * (This does not happen when using the #MDB_WRITEMAP flag.) As of From 369b99a46099fdca33ba86b530c1bed2ed098f92 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 20 Jan 2016 01:30:57 +0000 Subject: [PATCH 02/39] WIN64 needs off_t redefined too --- libraries/liblmdb/mdb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 1edc282b81..73c80da13c 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -35,6 +35,9 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif +#if defined(__WIN64__) +#define _FILE_OFFSET_BITS 64 +#endif #ifdef _WIN32 #include #include From 7a8d0f6953c3fb60c24c5cf11af2398f391de145 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Thu, 1 Dec 2016 21:56:35 +0100 Subject: [PATCH 03/39] Fix comment: SysV semaphores -> Posix semaphores --- libraries/liblmdb/mdb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 73c80da13c..27bcd026ce 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -247,10 +247,9 @@ typedef SSIZE_T ssize_t; /** Some platforms define the EOWNERDEAD error code * even though they don't support Robust Mutexes. * Compile with -DMDB_USE_ROBUST=0, or use some other - * mechanism like -DMDB_USE_SYSV_SEM instead of - * -DMDB_USE_POSIX_MUTEX. (SysV semaphores are - * also Robust, but some systems don't support them - * either.) + * mechanism like -DMDB_USE_POSIX_SEM instead of + * -DMDB_USE_POSIX_MUTEX. + * (Posix semaphores are not robust.) */ #ifndef MDB_USE_ROBUST /* Android currently lacks Robust Mutex support. So does glibc < 2.4. */ From b57bb99fa6c934a7f811d58d2028fdd2b8f60c25 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 23 Aug 2015 20:33:02 +0200 Subject: [PATCH 04/39] Pass cursor to mdb_page_get(), mdb_node_read(). No change in behavior. --- libraries/liblmdb/mdb.c | 68 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 27bcd026ce..6e84d4c285 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1301,7 +1301,7 @@ enum { #define MDB_END_SLOT MDB_NOTLS /**< release any reader slot if #MDB_NOTLS */ static void mdb_txn_end(MDB_txn *txn, unsigned mode); -static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); +static int mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **mp, int *lvl); static int mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int modify); #define MDB_PS_MODIFY 1 @@ -1330,7 +1330,7 @@ static int mdb_node_add(MDB_cursor *mc, indx_t indx, static void mdb_node_del(MDB_cursor *mc, int ksize); static void mdb_node_shrink(MDB_page *mp, indx_t indx); static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft); -static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data); +static int mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data); static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); static size_t mdb_branch_size(MDB_env *env, MDB_val *key); @@ -1846,7 +1846,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) { enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP }; MDB_txn *txn = mc->mc_txn; - MDB_cursor *m3; + MDB_cursor *m3, *m0 = mc; MDB_xcursor *mx; MDB_page *dp, *mp; MDB_node *leaf; @@ -1889,7 +1889,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) pgno_t pgno = txn->mt_dbs[i].md_root; if (pgno == P_INVALID) continue; - if ((rc = mdb_page_get(txn, pgno, &dp, &level)) != MDB_SUCCESS) + if ((rc = mdb_page_get(m0, pgno, &dp, &level)) != MDB_SUCCESS) break; if ((dp->mp_flags & Mask) == pflags && level <= 1) dp->mp_flags ^= P_KEEP; @@ -2193,7 +2193,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) } np = m2.mc_pg[m2.mc_top]; leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); - if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS) + if ((rc = mdb_node_read(&m2, leaf, &data)) != MDB_SUCCESS) return rc; idl = (MDB_ID *) data.mv_data; @@ -5219,15 +5219,16 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) } /** Find the address of the page corresponding to a given page number. - * @param[in] txn the transaction for this access. + * @param[in] mc the cursor accessing the page. * @param[in] pgno the page number for the page to retrieve. * @param[out] ret address of a pointer where the page's address will be stored. * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. * @return 0 on success, non-zero on failure. */ static int -mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) +mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl) { + MDB_txn *txn = mc->mc_txn; MDB_env *env = txn->mt_env; MDB_page *p = NULL; int level; @@ -5322,7 +5323,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) mdb_cassert(mc, i < NUMKEYS(mp)); node = NODEPTR(mp, i); - if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + if ((rc = mdb_page_get(mc, NODEPGNO(node), &mp, NULL)) != 0) return rc; mc->mc_ki[mc->mc_top] = i; @@ -5364,7 +5365,7 @@ mdb_page_search_lowest(MDB_cursor *mc) MDB_node *node = NODEPTR(mp, 0); int rc; - if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + if ((rc = mdb_page_get(mc, NODEPGNO(node), &mp, NULL)) != 0) return rc; mc->mc_ki[mc->mc_top] = 0; @@ -5416,7 +5417,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) return MDB_NOTFOUND; if ((leaf->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA) return MDB_INCOMPATIBLE; /* not a named DB */ - rc = mdb_node_read(mc->mc_txn, leaf, &data); + rc = mdb_node_read(&mc2, leaf, &data); if (rc) return rc; memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)), @@ -5440,7 +5441,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) mdb_cassert(mc, root > 1); if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) - if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) + if ((rc = mdb_page_get(mc, root, &mc->mc_pg[0], NULL)) != 0) return rc; mc->mc_snum = 1; @@ -5537,13 +5538,13 @@ release: } /** Return the data associated with a given node. - * @param[in] txn The transaction for this operation. + * @param[in] mc The cursor for this operation. * @param[in] leaf The node being read. * @param[out] data Updated to point to the node's data. * @return 0 on success, non-zero on failure. */ static int -mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) +mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data) { MDB_page *omp; /* overflow page */ pgno_t pgno; @@ -5559,7 +5560,7 @@ mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) */ data->mv_size = NODEDSZ(leaf); memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); - if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) { + if ((rc = mdb_page_get(mc, pgno, &omp, NULL)) != 0) { DPRINTF(("read overflow page %"Z"u failed", pgno)); return rc; } @@ -5633,7 +5634,7 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) { + if ((rc = mdb_page_get(mc, NODEPGNO(indx), &mp, NULL)) != 0) { /* mc will be inconsistent if caller does mc_snum++ as above */ mc->mc_flags &= ~(C_INITIALIZED|C_EOF); return rc; @@ -5716,7 +5717,7 @@ skip: mdb_xcursor_init1(mc, leaf); } if (data) { - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -5799,7 +5800,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) mdb_xcursor_init1(mc, leaf); } if (data) { - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -5981,7 +5982,7 @@ set1: } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { MDB_val olddata; MDB_cmp_func *dcmp; - if ((rc = mdb_node_read(mc->mc_txn, leaf, &olddata)) != MDB_SUCCESS) + if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS) return rc; dcmp = mc->mc_dbx->md_dcmp; #if UINT_MAX < SIZE_MAX @@ -5999,7 +6000,7 @@ set1: } else { if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; } } @@ -6048,7 +6049,7 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) if (rc) return rc; } else { - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; } } @@ -6093,7 +6094,7 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) if (rc) return rc; } else { - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; } } @@ -6139,7 +6140,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT); } else { - rc = mdb_node_read(mc->mc_txn, leaf, data); + rc = mdb_node_read(mc, leaf, data); } } } @@ -6232,7 +6233,7 @@ fetchm: MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { MDB_GET_KEY(leaf, key); - rc = mdb_node_read(mc->mc_txn, leaf, data); + rc = mdb_node_read(mc, leaf, data); break; } } @@ -6616,7 +6617,7 @@ current: int level, ovpages, dpages = OVPAGES(data->mv_size, env->me_psize); memcpy(&pg, olddata.mv_data, sizeof(pg)); - if ((rc2 = mdb_page_get(mc->mc_txn, pg, &omp, &level)) != 0) + if ((rc2 = mdb_page_get(mc, pg, &omp, &level)) != 0) return rc2; ovpages = omp->mp_pages; @@ -6922,7 +6923,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) pgno_t pg; memcpy(&pg, NODEDATA(leaf), sizeof(pg)); - if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) || + if ((rc = mdb_page_get(mc, pg, &omp, NULL)) || (rc = mdb_ovpage_free(mc, omp))) goto fail; } @@ -8081,7 +8082,7 @@ mdb_rebalance(MDB_cursor *mc) if (rc) return rc; mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0)); - rc = mdb_page_get(mc->mc_txn,mc->mc_db->md_root,&mc->mc_pg[0],NULL); + rc = mdb_page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], NULL); if (rc) return rc; mc->mc_db->md_depth--; @@ -8142,7 +8143,7 @@ mdb_rebalance(MDB_cursor *mc) DPUTS("reading right neighbor"); mn.mc_ki[ptop]++; node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); - rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + rc = mdb_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL); if (rc) return rc; mn.mc_ki[mn.mc_top] = 0; @@ -8154,7 +8155,7 @@ mdb_rebalance(MDB_cursor *mc) DPUTS("reading left neighbor"); mn.mc_ki[ptop]--; node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); - rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + rc = mdb_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL); if (rc) return rc; mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1; @@ -8904,7 +8905,6 @@ static int ESECT mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) { MDB_cursor mc = {0}; - MDB_txn *txn = my->mc_txn; MDB_node *ni; MDB_page *mo, *mp, *leaf; char *buf, *ptr; @@ -8916,9 +8916,9 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) return MDB_SUCCESS; mc.mc_snum = 1; - mc.mc_txn = txn; + mc.mc_txn = my->mc_txn; - rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL); + rc = mdb_page_get(&mc, *pg, &mc.mc_pg[0], NULL); if (rc) return rc; rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST); @@ -8962,7 +8962,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) } memcpy(&pg, NODEDATA(ni), sizeof(pg)); - rc = mdb_page_get(txn, pg, &omp, NULL); + rc = mdb_page_get(&mc, pg, &omp, NULL); if (rc) goto done; if (my->mc_wlen[toggle] >= MDB_WBUF) { @@ -9013,7 +9013,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) again: ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]); pg = NODEPGNO(ni); - rc = mdb_page_get(txn, pg, &mp, NULL); + rc = mdb_page_get(&mc, pg, &mp, NULL); if (rc) goto done; mc.mc_top++; @@ -9696,7 +9696,7 @@ mdb_drop0(MDB_cursor *mc, int subs) MDB_page *omp; pgno_t pg; memcpy(&pg, NODEDATA(ni), sizeof(pg)); - rc = mdb_page_get(txn, pg, &omp, NULL); + rc = mdb_page_get(mc, pg, &omp, NULL); if (rc != 0) goto done; mdb_cassert(mc, IS_OVERFLOW(omp)); From b8c1877be3338c776a5db9897d6bdbcf3945b106 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Fri, 9 Dec 2016 00:03:36 +0100 Subject: [PATCH 05/39] Cleanup: Add flag DB_DUPDATA, drop DB_DIRTY hack --- libraries/liblmdb/mdb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 6e84d4c285..0ce41b845a 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1078,11 +1078,12 @@ struct MDB_txn { * @ingroup internal * @{ */ -#define DB_DIRTY 0x01 /**< DB was modified or is DUPSORT data */ +#define DB_DIRTY 0x01 /**< DB was written in this txn */ #define DB_STALE 0x02 /**< Named-DB record is older than txnID */ #define DB_NEW 0x04 /**< Named-DB handle opened in this txn */ #define DB_VALID 0x08 /**< DB handle is valid, see also #MDB_VALID */ #define DB_USRVALID 0x10 /**< As #DB_VALID, but not set for #FREE_DBI */ +#define DB_DUPDATA 0x20 /**< DB is #MDB_DUPSORT data */ /** @} */ /** In write txns, array of cursors for each DB */ MDB_cursor **mt_cursors; @@ -6270,7 +6271,8 @@ mdb_cursor_touch(MDB_cursor *mc) { int rc = MDB_SUCCESS; - if (mc->mc_dbi >= CORE_DBS && !(*mc->mc_dbflag & DB_DIRTY)) { + if (mc->mc_dbi >= CORE_DBS && !(*mc->mc_dbflag & (DB_DIRTY|DB_DUPDATA))) { + /* Touch DB record of named DB */ MDB_cursor mc2; MDB_xcursor mcx; if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi)) @@ -7331,7 +7333,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) } DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, mx->mx_db.md_root)); - mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ + mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA; #if UINT_MAX < SIZE_MAX if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) mx->mx_dbx.md_cmp = mdb_cmp_clong; @@ -7357,7 +7359,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata) mx->mx_cursor.mc_top = 0; mx->mx_cursor.mc_flags |= C_INITIALIZED; mx->mx_cursor.mc_ki[0] = 0; - mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ + mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA; #if UINT_MAX < SIZE_MAX mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp; #endif From 6b1df0e4c7fadd21d1233d7157229b2d89ccaa04 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 29 Nov 2016 19:19:45 +0000 Subject: [PATCH 06/39] More for ITS#8406 (xcursor fixups) Revert excess cursor fixup xcursor fixup depends on init state --- libraries/liblmdb/mdb.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 0ce41b845a..b0faf6d3f5 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8222,6 +8222,11 @@ mdb_cursor_del0(MDB_cursor *mc) if (m3->mc_pg[mc->mc_top] == mp) { if (m3->mc_ki[mc->mc_top] == ki) { m3->mc_flags |= C_DEL; + if (mc->mc_db->md_flags & MDB_DUPSORT) { + /* Sub-cursor referred into dataset which is gone */ + m3->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + } + continue; } else if (m3->mc_ki[mc->mc_top] > ki) { m3->mc_ki[mc->mc_top]--; } @@ -8266,9 +8271,15 @@ mdb_cursor_del0(MDB_cursor *mc) } if (mc->mc_db->md_flags & MDB_DUPSORT) { MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); - if (node->mn_flags & F_DUPDATA) { - mdb_xcursor_init1(m3, node); - m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL; + /* If this node is a fake page, it needs to be reinited + * because its data has moved. But just reset mc_pg[0] + * if the xcursor is already live. + */ + if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) { + if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) + m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); + else + mdb_xcursor_init1(m3, node); } } } From fe2b1cd731c24a4cbea556a8800df4a1f4da0a72 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Thu, 1 Dec 2016 21:17:42 +0100 Subject: [PATCH 07/39] Factor out refreshing sub-page pointers --- libraries/liblmdb/mdb.c | 88 +++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 52 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index b0faf6d3f5..465cf0dc00 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1188,6 +1188,21 @@ typedef struct MDB_xcursor { unsigned char mx_dbflag; } MDB_xcursor; + /** Check if there is an inited xcursor, so #XCURSOR_REFRESH() is proper */ +#define XCURSOR_INITED(mc) \ + ((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + + /** Update sub-page pointer, if any, in \b mc->mc_xcursor. Needed + * when the node which contains the sub-page may have moved. Called + * with \b mp = mc->mc_pg[mc->mc_top], \b ki = mc->mc_ki[mc->mc_top]. + */ +#define XCURSOR_REFRESH(mc, mp, ki) do { \ + MDB_page *xr_pg = (mp); \ + MDB_node *xr_node = NODEPTR(xr_pg, ki); \ + if ((xr_node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) \ + (mc)->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(xr_node); \ +} while (0) + /** State of FreeDB old pages, stored in the MDB_env */ typedef struct MDB_pgstate { pgno_t *mf_pghead; /**< Reclaimed freeDB pages, or NULL before use */ @@ -1618,7 +1633,7 @@ mdb_cursor_chk(MDB_cursor *mc) } if (mc->mc_ki[i] >= NUMKEYS(mc->mc_pg[i])) printf("ack!\n"); - if (mc->mc_xcursor && (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { + if (XCURSOR_INITED(mc)) { node = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) && mc->mc_xcursor->mx_cursor.mc_pg[0] != NODEDATA(node)) { @@ -2433,14 +2448,8 @@ done: if (m2 == mc) continue; if (m2->mc_pg[mc->mc_top] == mp) { m2->mc_pg[mc->mc_top] = np; - if ((mc->mc_db->md_flags & MDB_DUPSORT) && - IS_LEAF(np) && - (m2->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - { - MDB_node *leaf = NODEPTR(np, m2->mc_ki[mc->mc_top]); - if ((leaf->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - } + if (XCURSOR_INITED(m2) && IS_LEAF(np)) + XCURSOR_REFRESH(m2, np, m2->mc_ki[mc->mc_top]); } } } @@ -6726,11 +6735,8 @@ new_sub: if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) { m3->mc_ki[i]++; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { - MDB_node *n2 = NODEPTR(mp, m3->mc_ki[i]); - if ((n2->mn_flags & (F_SUBDATA|F_DUPDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); - } + if (XCURSOR_INITED(m3)) + XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); } } } @@ -6781,9 +6787,7 @@ put_sub: if (m2->mc_ki[i] == mc->mc_ki[i]) { mdb_xcursor_init2(m2, mx, new_dupdata); } else if (!insert_key && m2->mc_ki[i] < nkeys) { - MDB_node *n2 = NODEPTR(mp, m2->mc_ki[i]); - if ((n2->mn_flags & (F_SUBDATA|F_DUPDATA)) == F_DUPDATA) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]); } } } @@ -6888,13 +6892,12 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; if (m2->mc_pg[mc->mc_top] == mp) { - if (m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) { - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - } else { - MDB_node *n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]); - if (!(n2->mn_flags & F_SUBDATA)) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + MDB_node *n2 = leaf; + if (m2->mc_ki[mc->mc_top] != mc->mc_ki[mc->mc_top]) { + n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]); + if (n2->mn_flags & F_SUBDATA) continue; } + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); } } } @@ -7731,12 +7734,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; m3->mc_ki[csrc->mc_top-1]++; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(mps)) { - MDB_node *node = NODEPTR(m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (XCURSOR_INITED(m3) && IS_LEAF(mps)) + XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); } } else /* Adding on the right, bump others down */ @@ -7757,12 +7756,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } else { m3->mc_ki[csrc->mc_top]--; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(mps)) { - MDB_node *node = NODEPTR(m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (XCURSOR_INITED(m3) && IS_LEAF(mps)) + XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); } } } @@ -7963,12 +7958,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) m3->mc_ki[top-1] > csrc->mc_ki[top-1]) { m3->mc_ki[top-1]--; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(psrc)) { - MDB_node *node = NODEPTR(m3->mc_pg[top], m3->mc_ki[top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (XCURSOR_INITED(m3) && IS_LEAF(psrc)) + XCURSOR_REFRESH(m3, m3->mc_pg[top], m3->mc_ki[top]); } } { @@ -8230,11 +8221,8 @@ mdb_cursor_del0(MDB_cursor *mc) } else if (m3->mc_ki[mc->mc_top] > ki) { m3->mc_ki[mc->mc_top]--; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { - MDB_node *node = NODEPTR(m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (XCURSOR_INITED(m3)) + XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); } } } @@ -8764,12 +8752,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { m3->mc_ki[ptop]++; } - if (m3->mc_xcursor && (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) && - IS_LEAF(mp)) { - MDB_node *node = NODEPTR(m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - } + if (XCURSOR_INITED(m3) && IS_LEAF(mp)) + XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); } } DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp))); From f24d7d2c83670848194f89c8455538394d2e09f5 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 7 Jan 2016 18:28:29 +0000 Subject: [PATCH 08/39] Add MDB_PREV_MULTIPLE (collected mdb.master changes) Logical counterpart to GET_MULTIPLE, NEXT_MULTIPLE --- libraries/liblmdb/lmdb.h | 9 ++++++--- libraries/liblmdb/mdb.c | 26 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index 14a05462a3..bc3a29484c 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -388,7 +388,9 @@ typedef enum MDB_cursor_op { MDB_PREV_NODUP, /**< Position at last data item of previous key */ MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ - MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ + MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ + MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to + a page of duplicate data items. Only for #MDB_DUPFIXED */ } MDB_cursor_op; /** @defgroup errors Return Codes @@ -1095,8 +1097,9 @@ int mdb_txn_renew(MDB_txn *txn); * This flag may only be used in combination with #MDB_DUPSORT. This option * tells the library that the data items for this database are all the same * size, which allows further optimizations in storage and retrieval. When - * all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE - * cursor operations may be used to retrieve multiple items at once. + * all data items are the same size, the #MDB_GET_MULTIPLE, #MDB_NEXT_MULTIPLE + * and #MDB_PREV_MULTIPLE cursor operations may be used to retrieve multiple + * items at once. *
  • #MDB_INTEGERDUP * This option specifies that duplicate data items are binary integers, * similar to #MDB_INTEGERKEY keys. diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 465cf0dc00..7d34276566 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1437,7 +1437,7 @@ mdb_strerror(int err) * and the actual use of the message uses more than 4K of stack. */ #define MSGSIZE 1024 -#define PADSIZE 4096 +#define PADSIZE 4096 char buf[MSGSIZE+PADSIZE], *ptr = buf; #endif int i; @@ -6215,6 +6215,30 @@ fetchm: } } break; + case MDB_PREV_MULTIPLE: + if (data == NULL) { + rc = EINVAL; + break; + } + if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { + rc = MDB_INCOMPATIBLE; + break; + } + if (!(mc->mc_flags & C_INITIALIZED)) + rc = mdb_cursor_last(mc, key, data); + else + rc = MDB_SUCCESS; + if (rc == MDB_SUCCESS) { + MDB_cursor *mx = &mc->mc_xcursor->mx_cursor; + if (mx->mc_flags & C_INITIALIZED) { + rc = mdb_cursor_sibling(mx, 0); + if (rc == MDB_SUCCESS) + goto fetchm; + } else { + rc = MDB_NOTFOUND; + } + } + break; case MDB_NEXT: case MDB_NEXT_DUP: case MDB_NEXT_NODUP: From 1d8623504790540e481b4c24838f9c48ef58c117 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 25 Jun 2016 07:55:34 +0200 Subject: [PATCH 09/39] ITS#8209 MDB_CP_COMPACT: Threading/error handling Handle errors. Fix cond_wait condition so mc_new is the sole control var. Drop specious cond_waits. Do not look at 'mo' while copythr writes it. Don't know if posix_memalign() pointer is defined after failure. Some _aligned_free() doc seems to say arg NULL = user error. --- libraries/liblmdb/mdb.c | 157 ++++++++++++++++++++++------------------ 1 file changed, 85 insertions(+), 72 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 7d34276566..255e216724 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -291,8 +291,10 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t; #define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE) #define pthread_cond_signal(x) SetEvent(*x) #define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) -#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) -#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) +#define THREAD_CREATE(thr,start,arg) \ + (((thr) = CreateThread(NULL, 0, start, arg, 0, NULL)) ? 0 : ErrCode()) +#define THREAD_FINISH(thr) \ + (WaitForSingleObject(thr, INFINITE) ? ErrCode() : 0) #define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE) #define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex) #define mdb_mutex_consistent(mutex) 0 @@ -8818,11 +8820,12 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, #ifndef MDB_WBUF #define MDB_WBUF (1024*1024) #endif +#define MDB_EOF 0x10 /**< #mdb_env_copyfd1() is done reading */ - /** State needed for a compacting copy. */ + /** State needed for a double-buffering compacting copy. */ typedef struct mdb_copy { pthread_mutex_t mc_mutex; - pthread_cond_t mc_cond; + pthread_cond_t mc_cond; /**< Condition variable for #mc_new */ char *mc_wbuf[2]; char *mc_over[2]; MDB_env *mc_env; @@ -8831,10 +8834,12 @@ typedef struct mdb_copy { int mc_olen[2]; pgno_t mc_next_pgno; HANDLE mc_fd; - int mc_status; - volatile int mc_new; - int mc_toggle; - + int mc_toggle; /**< Buffer number in provider */ + int mc_new; /**< (0-2 buffers to write) | (#MDB_EOF at end) */ + /** Error code. Never cleared if set. Both threads can set nonzero + * to fail the copy. Not mutex-protected, LMDB expects atomic int. + */ + volatile int mc_error; } mdb_copy; /** Dedicated writer thread for compacting copy. */ @@ -8853,20 +8858,16 @@ mdb_env_copythr(void *arg) #endif pthread_mutex_lock(&my->mc_mutex); - my->mc_new = 0; - pthread_cond_signal(&my->mc_cond); for(;;) { while (!my->mc_new) pthread_cond_wait(&my->mc_cond, &my->mc_mutex); - if (my->mc_new < 0) { - my->mc_new = 0; + if (my->mc_new == 0 + MDB_EOF) /* 0 buffers, just EOF */ break; - } - my->mc_new = 0; wsize = my->mc_wlen[toggle]; ptr = my->mc_wbuf[toggle]; again: - while (wsize > 0) { + rc = MDB_SUCCESS; + while (wsize > 0 && !my->mc_error) { DO_WRITE(rc, my->mc_fd, ptr, wsize, len); if (!rc) { rc = ErrCode(); @@ -8882,8 +8883,7 @@ again: } } if (rc) { - my->mc_status = rc; - break; + my->mc_error = rc; } /* If there's an overflow page tail, write it too */ if (my->mc_olen[toggle]) { @@ -8894,34 +8894,41 @@ again: } my->mc_wlen[toggle] = 0; toggle ^= 1; + /* Return the empty buffer to provider */ + my->mc_new--; pthread_cond_signal(&my->mc_cond); } - pthread_cond_signal(&my->mc_cond); pthread_mutex_unlock(&my->mc_mutex); return (THREAD_RET)0; #undef DO_WRITE } - /** Tell the writer thread there's a buffer ready to write */ + /** Give buffer and/or #MDB_EOF to writer thread, await unused buffer. + * + * @param[in] my control structure. + * @param[in] adjust (1 to hand off 1 buffer) | (MDB_EOF when ending). + */ static int ESECT -mdb_env_cthr_toggle(mdb_copy *my, int st) +mdb_env_cthr_toggle(mdb_copy *my, int adjust) { - int toggle = my->mc_toggle ^ 1; pthread_mutex_lock(&my->mc_mutex); - if (my->mc_status) { - pthread_mutex_unlock(&my->mc_mutex); - return my->mc_status; - } - while (my->mc_new == 1) - pthread_cond_wait(&my->mc_cond, &my->mc_mutex); - my->mc_new = st; - my->mc_toggle = toggle; + my->mc_new += adjust; pthread_cond_signal(&my->mc_cond); + while (my->mc_new & 2) /* both buffers in use */ + pthread_cond_wait(&my->mc_cond, &my->mc_mutex); pthread_mutex_unlock(&my->mc_mutex); - return 0; + + my->mc_toggle ^= (adjust & 1); + /* Both threads reset mc_wlen, to be safe from threading errors */ + my->mc_wlen[my->mc_toggle] = 0; + return my->mc_error; } - /** Depth-first tree traversal for compacting copy. */ + /** Depth-first tree traversal for compacting copy. + * @param[in] my control structure. + * @param[in,out] pg database root. + * @param[in] flags includes #F_DUPDATA if it is a sorted-duplicate sub-DB. + */ static int ESECT mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) { @@ -8983,6 +8990,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) } memcpy(&pg, NODEDATA(ni), sizeof(pg)); + memcpy(NODEDATA(ni), &my->mc_next_pgno, sizeof(pgno_t)); rc = mdb_page_get(&mc, pg, &omp, NULL); if (rc) goto done; @@ -9005,7 +9013,6 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) goto done; toggle = my->mc_toggle; } - memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); } else if (ni->mn_flags & F_SUBDATA) { MDB_db db; @@ -9083,47 +9090,55 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) { MDB_meta *mm; MDB_page *mp; - mdb_copy my; + mdb_copy my = {0}; MDB_txn *txn = NULL; pthread_t thr; - int rc; + int rc = MDB_SUCCESS; #ifdef _WIN32 - my.mc_mutex = CreateMutex(NULL, FALSE, NULL); - my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!(my.mc_mutex = CreateMutex(NULL, FALSE, NULL)) || + !(my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { + rc = ErrCode(); + goto done; + } my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize); - if (my.mc_wbuf[0] == NULL) - return errno; + if (my.mc_wbuf[0] == NULL) { + /* _aligned_malloc() sets errno, but we use Windows error codes */ + rc = ERROR_NOT_ENOUGH_MEMORY; + goto done; + } #else - pthread_mutex_init(&my.mc_mutex, NULL); - pthread_cond_init(&my.mc_cond, NULL); + if ((rc = pthread_mutex_init(&my.mc_mutex, NULL)) != 0) + return rc; + if ((rc = pthread_cond_init(&my.mc_cond, NULL)) != 0) + goto done2; #ifdef HAVE_MEMALIGN my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2); - if (my.mc_wbuf[0] == NULL) - return errno; + if (my.mc_wbuf[0] == NULL) { + rc = errno; + goto done; + } #else - rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2); - if (rc) - return rc; + { + void *p; + if ((rc = posix_memalign(&p, env->me_os_psize, MDB_WBUF*2)) != 0) + goto done; + my.mc_wbuf[0] = p; + } #endif #endif memset(my.mc_wbuf[0], 0, MDB_WBUF*2); my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF; - my.mc_wlen[0] = 0; - my.mc_wlen[1] = 0; - my.mc_olen[0] = 0; - my.mc_olen[1] = 0; my.mc_next_pgno = NUM_METAS; - my.mc_status = 0; - my.mc_new = 1; - my.mc_toggle = 0; my.mc_env = env; my.mc_fd = fd; - THREAD_CREATE(thr, mdb_env_copythr, &my); + rc = THREAD_CREATE(thr, mdb_env_copythr, &my); + if (rc) + goto done; rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); if (rc) - return rc; + goto finish; mp = (MDB_page *)my.mc_wbuf[0]; memset(mp, 0, NUM_METAS * env->me_psize); @@ -9149,6 +9164,8 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) mdb_cursor_init(&mc, txn, FREE_DBI, NULL); while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) freecount += *(MDB_ID *)data.mv_data; + if (rc != MDB_NOTFOUND) + goto finish; freecount += txn->mt_dbs[FREE_DBI].md_branch_pages + txn->mt_dbs[FREE_DBI].md_leaf_pages + txn->mt_dbs[FREE_DBI].md_overflow_pages; @@ -9165,31 +9182,27 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) } my.mc_wlen[0] = env->me_psize * NUM_METAS; my.mc_txn = txn; - pthread_mutex_lock(&my.mc_mutex); - while(my.mc_new) - pthread_cond_wait(&my.mc_cond, &my.mc_mutex); - pthread_mutex_unlock(&my.mc_mutex); rc = mdb_env_cwalk(&my, &txn->mt_dbs[MAIN_DBI].md_root, 0); - if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) - rc = mdb_env_cthr_toggle(&my, 1); - mdb_env_cthr_toggle(&my, -1); - pthread_mutex_lock(&my.mc_mutex); - while(my.mc_new) - pthread_cond_wait(&my.mc_cond, &my.mc_mutex); - pthread_mutex_unlock(&my.mc_mutex); - THREAD_FINISH(thr); +finish: + if (rc) + my.mc_error = rc; + mdb_env_cthr_toggle(&my, 1 | MDB_EOF); + rc = THREAD_FINISH(thr); mdb_txn_abort(txn); + +done: #ifdef _WIN32 - CloseHandle(my.mc_cond); - CloseHandle(my.mc_mutex); - _aligned_free(my.mc_wbuf[0]); + if (my.mc_wbuf[0]) _aligned_free(my.mc_wbuf[0]); + if (my.mc_cond) CloseHandle(my.mc_cond); + if (my.mc_mutex) CloseHandle(my.mc_mutex); #else - pthread_cond_destroy(&my.mc_cond); - pthread_mutex_destroy(&my.mc_mutex); free(my.mc_wbuf[0]); + pthread_cond_destroy(&my.mc_cond); +done2: + pthread_mutex_destroy(&my.mc_mutex); #endif - return rc; + return rc ? rc : my.mc_error; } /** Copy environment as-is. */ From 14ca16034a52785c2bdcdfac406c6dc9de88a207 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 25 Jun 2016 07:57:04 +0200 Subject: [PATCH 10/39] ITS#8209 MDB_CP_COMPACT: Handle empty or broken DB Preserve DB flags (use metapage#1) when main DB is empty. Fail if metapage root != actual root in output file. --- libraries/liblmdb/lmdb.h | 1 + libraries/liblmdb/mdb.c | 36 +++++++++++++++++++++++------------- libraries/liblmdb/mdb_copy.1 | 1 + 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index bc3a29484c..a356538595 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -679,6 +679,7 @@ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); *
  • #MDB_CP_COMPACT - Perform compaction while copying: omit free * pages and sequentially renumber all pages in output. This option * consumes more CPU and runs more slowly than the default. + * Currently it fails if the environment has suffered a page leak. * * @return A non-zero error value on failure and 0 on success. */ diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 255e216724..28ccb73e66 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -9093,6 +9093,7 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) mdb_copy my = {0}; MDB_txn *txn = NULL; pthread_t thr; + pgno_t root, new_root; int rc = MDB_SUCCESS; #ifdef _WIN32 @@ -9154,10 +9155,12 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) *(MDB_meta *)METADATA(mp) = *mm; mm = (MDB_meta *)METADATA(mp); - /* Count the number of free pages, subtract from lastpg to find - * number of active pages - */ - { + /* Set metapage 1 with current main DB */ + root = new_root = txn->mt_dbs[MAIN_DBI].md_root; + if (root != P_INVALID) { + /* Count free pages + freeDB pages. Subtract from last_pg + * to find the new last_pg, which also becomes the new root. + */ MDB_ID freecount = 0; MDB_cursor mc; MDB_val key, data; @@ -9170,19 +9173,26 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) txn->mt_dbs[FREE_DBI].md_leaf_pages + txn->mt_dbs[FREE_DBI].md_overflow_pages; - /* Set metapage 1 */ - mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; + new_root = txn->mt_next_pgno - 1 - freecount; + mm->mm_last_pg = new_root; mm->mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; - if (mm->mm_last_pg > NUM_METAS-1) { - mm->mm_dbs[MAIN_DBI].md_root = mm->mm_last_pg; - mm->mm_txnid = 1; - } else { - mm->mm_dbs[MAIN_DBI].md_root = P_INVALID; - } + mm->mm_dbs[MAIN_DBI].md_root = new_root; + } else { + /* When the DB is empty, handle it specially to + * fix any breakage like page leaks from ITS#8174. + */ + mm->mm_dbs[MAIN_DBI].md_flags = txn->mt_dbs[MAIN_DBI].md_flags; } + if (root != P_INVALID || mm->mm_dbs[MAIN_DBI].md_flags) { + mm->mm_txnid = 1; /* use metapage 1 */ + } + my.mc_wlen[0] = env->me_psize * NUM_METAS; my.mc_txn = txn; - rc = mdb_env_cwalk(&my, &txn->mt_dbs[MAIN_DBI].md_root, 0); + rc = mdb_env_cwalk(&my, &root, 0); + if (rc == MDB_SUCCESS && root != new_root) { + rc = MDB_INCOMPATIBLE; /* page leak or corrupt DB */ + } finish: if (rc) diff --git a/libraries/liblmdb/mdb_copy.1 b/libraries/liblmdb/mdb_copy.1 index 258affbfb4..4387ac3a14 100644 --- a/libraries/liblmdb/mdb_copy.1 +++ b/libraries/liblmdb/mdb_copy.1 @@ -36,6 +36,7 @@ Write the library version number to the standard output, and exit. Compact while copying. Only current data pages will be copied; freed or unused pages will be omitted from the copy. This option will slow down the backup process as it is more CPU-intensive. +Currently it fails if the environment has suffered a page leak. .TP .BR \-n Open LDMB environment(s) which do not use subdirectories. From b922a5a086bc5f615e69aa82de5dff46b6ea8895 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 3 Sep 2016 09:41:30 +0200 Subject: [PATCH 11/39] Silence warning for initializer "mdb_copy my = {0}" 1st struct member was not a scalar. --- libraries/liblmdb/mdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 28ccb73e66..3f78d3c483 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8824,12 +8824,12 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, /** State needed for a double-buffering compacting copy. */ typedef struct mdb_copy { + MDB_env *mc_env; + MDB_txn *mc_txn; pthread_mutex_t mc_mutex; pthread_cond_t mc_cond; /**< Condition variable for #mc_new */ char *mc_wbuf[2]; char *mc_over[2]; - MDB_env *mc_env; - MDB_txn *mc_txn; int mc_wlen[2]; int mc_olen[2]; pgno_t mc_next_pgno; From b92a4816bf67db1fbf6aad0282e485abd3cfd9ef Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 2 Aug 2016 22:16:09 +0200 Subject: [PATCH 12/39] Doc fixes: VALID_FLAGS, mm_last_pg, mt_loose_count --- libraries/liblmdb/mdb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 3f78d3c483..fe58cab503 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -974,9 +974,9 @@ typedef struct MDB_db { pgno_t md_root; /**< the root page of this tree */ } MDB_db; - /** mdb_dbi_open flags */ #define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */ #define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) + /** #mdb_dbi_open() flags */ #define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\ MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE) @@ -1007,7 +1007,10 @@ typedef struct MDB_meta { #define mm_psize mm_dbs[FREE_DBI].md_pad /** Any persistent environment flags. @ref mdb_env */ #define mm_flags mm_dbs[FREE_DBI].md_flags - pgno_t mm_last_pg; /**< last used page in file */ + /** Last used page in the datafile. + * Actually the file may be shorter if the freeDB lists the final pages. + */ + pgno_t mm_last_pg; volatile txnid_t mm_txnid; /**< txnid that committed this page */ } MDB_meta; @@ -1057,7 +1060,7 @@ struct MDB_txn { * in this transaction, linked through #NEXT_LOOSE_PAGE(page). */ MDB_page *mt_loose_pgs; - /* #Number of loose pages (#mt_loose_pgs) */ + /** Number of loose pages (#mt_loose_pgs) */ int mt_loose_count; /** The sorted list of dirty pages we temporarily wrote to disk * because the dirty list was full. page numbers in here are From 7dab84978ca82225ded004221196d0942fb7f200 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 2 Aug 2016 22:17:54 +0200 Subject: [PATCH 13/39] Comment MDB_page --- libraries/liblmdb/mdb.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index fe58cab503..4adbf5783f 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -768,9 +768,23 @@ typedef struct MDB_txninfo { + (((MDB_PIDLOCK) != 0) << 16))) /** @} */ -/** Common header for all page types. - * Overflow records occupy a number of contiguous pages with no - * headers on any page after the first. +/** Common header for all page types. The page type depends on #mp_flags. + * + * #P_BRANCH and #P_LEAF pages have unsorted '#MDB_node's at the end, with + * sorted #mp_ptrs[] entries referring to them. Exception: #P_LEAF2 pages + * omit mp_ptrs and pack sorted #MDB_DUPFIXED values after the page header. + * + * #P_OVERFLOW records occupy one or more contiguous pages where only the + * first has a page header. They hold the real data of #F_BIGDATA nodes. + * + * #P_SUBP sub-pages are small leaf "pages" with duplicate data. + * A node with flag #F_DUPDATA but not #F_SUBDATA contains a sub-page. + * (Duplicate data can also go in sub-databases, which use normal pages.) + * + * #P_META pages contain #MDB_meta, the start point of an LMDB snapshot. + * + * Each non-metapage up to #MDB_meta.%mm_last_pg is reachable exactly once + * in the snapshot: Either used by a database or listed in a freeDB record. */ typedef struct MDB_page { #define mp_pgno mp_p.p_pgno @@ -779,7 +793,7 @@ typedef struct MDB_page { pgno_t p_pgno; /**< page number */ struct MDB_page *p_next; /**< for in-memory list of freed pages */ } mp_p; - uint16_t mp_pad; + uint16_t mp_pad; /**< key size if this is a LEAF2 page */ /** @defgroup mdb_page Page Flags * @ingroup internal * Flags for the page headers. @@ -846,7 +860,9 @@ typedef struct MDB_page { /** The number of overflow pages needed to store the given size. */ #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) - /** Link in #MDB_txn.%mt_loose_pgs list */ + /** Link in #MDB_txn.%mt_loose_pgs list. + * Kept outside the page header, which is needed when reusing the page. + */ #define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2)) /** Header for a single key/data pair within a page. From 831e70565e9e2074e462780c7b44b60dad230e56 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Fri, 19 Aug 2016 17:24:25 +0100 Subject: [PATCH 14/39] ITS#8481 make shared lib suffix overridable --- libraries/liblmdb/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile index 0940c496ea..f254511f1d 100644 --- a/libraries/liblmdb/Makefile +++ b/libraries/liblmdb/Makefile @@ -26,6 +26,7 @@ OPT = -O2 -g CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) LDLIBS = SOLIBS = +SOEXT = .so prefix = /usr/local exec_prefix = $(prefix) bindir = $(exec_prefix)/bin @@ -37,7 +38,7 @@ mandir = $(datarootdir)/man ######################################################################## IHDRS = lmdb.h -ILIBS = liblmdb.a liblmdb.so +ILIBS = liblmdb.a liblmdb$(SOEXT) IPROGS = mdb_stat mdb_copy mdb_dump mdb_load IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1 PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5 @@ -63,7 +64,7 @@ test: all liblmdb.a: mdb.o midl.o $(AR) rs $@ mdb.o midl.o -liblmdb.so: mdb.lo midl.lo +liblmdb$(SOEXT): mdb.lo midl.lo # $(CC) $(LDFLAGS) -pthread -shared -Wl,-Bsymbolic -o $@ mdb.o midl.o $(SOLIBS) $(CC) $(LDFLAGS) -pthread -shared -o $@ mdb.lo midl.lo $(SOLIBS) From b95a2dc33328f12d18911ae8397349457f0c780c Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sat, 19 Dec 2015 23:51:31 +0000 Subject: [PATCH 15/39] Fix mdb_audit() printf format --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 4adbf5783f..1b9e819371 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1715,7 +1715,7 @@ static void mdb_audit(MDB_txn *txn) } } if (freecount + count + NUM_METAS != txn->mt_next_pgno) { - fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n", + fprintf(stderr, "audit: %"Z"u freecount: %"Z"u count: %"Z"u total: %"Z"u next_pgno: %"Z"u\n", txn->mt_txnid, freecount, count+NUM_METAS, freecount+count+NUM_METAS, txn->mt_next_pgno); } From 590792f31bf201a92ab8b457a11bde31b3e75232 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 21 Aug 2016 23:20:22 +0200 Subject: [PATCH 16/39] Fix mdb_page_list() message --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 1b9e819371..fca90c9eff 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1599,7 +1599,7 @@ mdb_page_list(MDB_page *mp) pgno, ((MDB_meta *)METADATA(mp))->mm_txnid); return; default: - fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags); + fprintf(stderr, "Bad page %"Z"u flags 0x%X\n", pgno, mp->mp_flags); return; } From 38564c92dac6420ec20ec3d8d94a3c6f7f2bff54 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sun, 21 Aug 2016 23:22:18 +0200 Subject: [PATCH 17/39] Clean up MDB_USE_ROBUST #defines Make explicit and default nonzero equivalent. Parenthesize. --- libraries/liblmdb/mdb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index fca90c9eff..246c32eb2f 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -258,6 +258,10 @@ typedef SSIZE_T ssize_t; # define MDB_USE_ROBUST 0 # else # define MDB_USE_ROBUST 1 +# endif +#endif /* !MDB_USE_ROBUST */ + +#if defined(MDB_USE_POSIX_MUTEX) && (MDB_USE_ROBUST) /* glibc < 2.12 only provided _np API */ # if (defined(__GLIBC__) && GLIBC_VER < 0x02000c) || \ (defined(PTHREAD_MUTEX_ROBUST_NP) && !defined(PTHREAD_MUTEX_ROBUST)) @@ -265,10 +269,9 @@ typedef SSIZE_T ssize_t; # define pthread_mutexattr_setrobust(attr, flag) pthread_mutexattr_setrobust_np(attr, flag) # define pthread_mutex_consistent(mutex) pthread_mutex_consistent_np(mutex) # endif -# endif -#endif /* MDB_USE_ROBUST */ +#endif /* MDB_USE_POSIX_MUTEX && MDB_USE_ROBUST */ -#if defined(MDB_OWNERDEAD) && MDB_USE_ROBUST +#if defined(MDB_OWNERDEAD) && (MDB_USE_ROBUST) #define MDB_ROBUST_SUPPORTED 1 #endif From 56b81e6d576b82bd8992b1796e4c9d5a4ec12929 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 1 Sep 2016 00:41:35 +0100 Subject: [PATCH 18/39] ITS#8489 reset cursor EOF flag in cursor_set It usually gets done anyway, but one of the fastpath shortcuts bypassed this step. --- libraries/liblmdb/mdb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 246c32eb2f..da3593b27d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -5928,6 +5928,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, } } rc = 0; + mc->mc_flags &= ~C_EOF; goto set2; } } From 9036e261c4c1cb107029cb5ede784b9327fc8751 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 6 Sep 2016 17:41:02 +0200 Subject: [PATCH 19/39] ITS#7682 F_NOCACHE: Allow error, skip any O_DIRECT We can run without F_NOCACHE if it fails. And we do not know what combining it with O_DIRECT means, if a system has both. --- libraries/liblmdb/mdb.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index da3593b27d..0575958486 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -9394,17 +9394,12 @@ mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) } if (env->me_psize >= env->me_os_psize) { -#ifdef O_DIRECT +#ifdef F_NOCACHE /* __APPLE__ */ + (void) fcntl(newfd, F_NOCACHE, 1); +#elif defined O_DIRECT /* Set O_DIRECT if the file system supports it */ if ((rc = fcntl(newfd, F_GETFL)) != -1) (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); -#endif -#ifdef F_NOCACHE /* __APPLE__ */ - rc = fcntl(newfd, F_NOCACHE, 1); - if (rc) { - rc = ErrCode(); - goto leave; - } #endif } From bffe2ebcba56e0475d50c5d2aae86f2b7b0d3090 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 6 Sep 2016 17:48:31 +0200 Subject: [PATCH 20/39] Drop spurious Errcode() call --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 0575958486..9c85230a6b 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4523,7 +4523,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) if (rc == MDB_ERRCODE_ROFS && (env->me_flags & MDB_RDONLY)) { return MDB_SUCCESS; } - goto fail_errno; + goto fail; } #if ! ((MDB_CLOEXEC) || defined(_WIN32)) /* Lose record locks when exec*() */ From e674f8241c5c9b9005688a94ce62f60556942b30 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 6 Sep 2016 17:56:13 +0200 Subject: [PATCH 21/39] Clean up strange fcntl result check ...and check !MDB_CLOEXEC in an 'if' rather than '#if' to match its non-zero usage. --- libraries/liblmdb/mdb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 9c85230a6b..36d50b82bb 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4499,9 +4499,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) #ifdef O_CLOEXEC /* Linux: Open file and set FD_CLOEXEC atomically */ # define MDB_CLOEXEC O_CLOEXEC #else - int fdflags; # define MDB_CLOEXEC 0 #endif + int fdflags; #endif int rc; off_t size, rsize; @@ -4525,10 +4525,10 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) } goto fail; } -#if ! ((MDB_CLOEXEC) || defined(_WIN32)) +#ifndef _WIN32 /* Lose record locks when exec*() */ - if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) - fcntl(env->me_lfd, F_SETFD, fdflags); + if (!(MDB_CLOEXEC) && (fdflags = fcntl(env->me_lfd, F_GETFD)) != -1) + fcntl(env->me_lfd, F_SETFD, fdflags | FD_CLOEXEC); #endif if (!(env->me_flags & MDB_NOTLS)) { From f2ecddbcf76dec209de2e4d15f2a2d7eb41ce20f Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 6 Sep 2016 18:12:01 +0200 Subject: [PATCH 22/39] ITS#7992 Tighter utf8_to_utf16(), fix errcodes The 0xFFFD check seems due to misleading MultiByteToWideChar() doc. Bad UTF-8 gives 0xFFFD in the output string, not the return value. --- libraries/liblmdb/mdb.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 36d50b82bb..4e4385c04c 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -10104,25 +10104,31 @@ mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc) return rc; } #endif /* MDB_ROBUST_SUPPORTED */ -/** @} */ #if defined(_WIN32) -static int utf8_to_utf16(const char *src, int srcsize, wchar_t **dst, int *dstsize) +static int ESECT +utf8_to_utf16(const char *src, int srcsize, wchar_t **dst, int *dstsize) { - int need; - wchar_t *result; - need = MultiByteToWideChar(CP_UTF8, 0, src, srcsize, NULL, 0); - if (need == 0xFFFD) - return EILSEQ; - if (need == 0) - return EINVAL; - result = malloc(sizeof(wchar_t) * need); - if (!result) - return ENOMEM; - MultiByteToWideChar(CP_UTF8, 0, src, srcsize, result, need); - if (dstsize) - *dstsize = need; - *dst = result; - return 0; + int rc, need = 0; + wchar_t *result = NULL; + for (;;) { /* malloc result, then fill it in */ + need = MultiByteToWideChar(CP_UTF8, 0, src, srcsize, result, need); + if (!need) { + rc = ErrCode(); + free(result); + return rc; + } + if (!result) { + result = malloc(sizeof(wchar_t) * need); + if (!result) + return ENOMEM; + continue; + } + if (dstsize) + *dstsize = need; + *dst = result; + return MDB_SUCCESS; + } } #endif /* defined(_WIN32) */ +/** @} */ From 3faef632a027681bf4682d94a5a0e7114d8c1081 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Sat, 17 Sep 2016 21:31:04 +0200 Subject: [PATCH 23/39] Factor filename handling out to mdb_fname_*() No change in functionality, except needs less mallocing. --- libraries/liblmdb/mdb.c | 197 +++++++++++++++++++++++----------------- 1 file changed, 113 insertions(+), 84 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 4e4385c04c..af0e9364a0 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -41,6 +41,8 @@ #ifdef _WIN32 #include #include +#include /* get wcscpy() */ + /** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it * as int64 which is wrong. MSVC doesn't define it at all, so just * don't use it. @@ -1415,7 +1417,8 @@ static SECURITY_DESCRIPTOR mdb_null_sd; static SECURITY_ATTRIBUTES mdb_all_sa; static int mdb_sec_inited; -static int utf8_to_utf16(const char *src, int srcsize, wchar_t **dst, int *dstsize); +struct MDB_name; +static int utf8_to_utf16(const char *src, struct MDB_name *dst, int xtra); #endif /** Return the library version info. */ @@ -4079,6 +4082,79 @@ mdb_fsize(HANDLE fd, size_t *size) return MDB_SUCCESS; } + +#ifdef _WIN32 +typedef wchar_t mdb_nchar_t; +# define MDB_NAME(str) L##str +# define mdb_name_cpy wcscpy +#else +/** Character type for file names: char on Unix, wchar_t on Windows */ +typedef char mdb_nchar_t; +# define MDB_NAME(str) str /**< #mdb_nchar_t[] string literal */ +# define mdb_name_cpy strcpy /**< Copy name (#mdb_nchar_t string) */ +#endif + +/** Filename - string of #mdb_nchar_t[] */ +typedef struct MDB_name { + int mn_len; /**< Length */ + int mn_alloced; /**< True if #mn_val was malloced */ + mdb_nchar_t *mn_val; /**< Contents */ +} MDB_name; + +/** Filename suffixes [datafile,lockfile][without,with MDB_NOSUBDIR] */ +static const mdb_nchar_t *const mdb_suffixes[2][2] = { + { MDB_NAME("/data.mdb"), MDB_NAME("") }, + { MDB_NAME("/lock.mdb"), MDB_NAME("-lock") } +}; + +#define MDB_SUFFLEN 9 /**< Max string length in #mdb_suffixes[] */ + +/** Set up filename + scratch area for filename suffix, for opening files. + * It should be freed with #mdb_fname_destroy(). + * On Windows, paths are converted from char *UTF-8 to wchar_t *UTF-16. + * + * @param[in] path Pathname for #mdb_env_open(). + * @param[in] envflags Whether a subdir and/or lockfile will be used. + * @param[out] fname Resulting filename, with room for a suffix if necessary. + */ +static int ESECT +mdb_fname_init(const char *path, unsigned envflags, MDB_name *fname) +{ + int no_suffix = F_ISSET(envflags, MDB_NOSUBDIR|MDB_NOLOCK); + fname->mn_alloced = 0; +#ifdef _WIN32 + return utf8_to_utf16(path, fname, no_suffix ? 0 : MDB_SUFFLEN); +#else + fname->mn_len = strlen(path); + if (no_suffix) + fname->mn_val = (char *) path; + else if ((fname->mn_val = malloc(fname->mn_len + MDB_SUFFLEN+1)) != NULL) { + fname->mn_alloced = 1; + strcpy(fname->mn_val, path); + } + else + return ENOMEM; + return MDB_SUCCESS; +#endif +} + +/** Destroy \b fname from #mdb_fname_init() */ +#define mdb_fname_destroy(fname) \ + do { if ((fname).mn_alloced) free((fname).mn_val); } while (0) + +enum mdb_fopen_type { + MDB_O_DATA, MDB_O_LOCKS +}; + +static void ESECT +mdb_fname_suffix_set(MDB_env *env, MDB_name *fname, enum mdb_fopen_type which) +{ + if (fname->mn_alloced) /* modifiable copy */ + mdb_name_cpy(fname->mn_val + fname->mn_len, + mdb_suffixes[which==MDB_O_LOCKS][F_ISSET(env->me_flags, MDB_NOSUBDIR)]); +} + + #ifdef BROKEN_FDATASYNC #include #include @@ -4484,13 +4560,13 @@ mdb_hash_enc(MDB_val *val, char *encbuf) /** Open and/or initialize the lock region for the environment. * @param[in] env The LMDB environment. - * @param[in] lpath The pathname of the file used for the lock region. + * @param[in] fname Filename + scratch area, from #mdb_fname_init(). * @param[in] mode The Unix permissions for the file, if we create it. * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive * @return 0 on success, non-zero on failure. */ static int ESECT -mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) +mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl) { #ifdef _WIN32 # define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT @@ -4506,17 +4582,13 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) int rc; off_t size, rsize; + mdb_fname_suffix_set(env, fname, MDB_O_LOCKS); #ifdef _WIN32 - wchar_t *wlpath; - rc = utf8_to_utf16(lpath, -1, &wlpath, NULL); - if (rc) - return rc; - env->me_lfd = CreateFileW(wlpath, GENERIC_READ|GENERIC_WRITE, + env->me_lfd = CreateFileW(fname->mn_val, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); - free(wlpath); #else - env->me_lfd = open(lpath, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); + env->me_lfd = open(fname->mn_val, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); #endif if (env->me_lfd == INVALID_HANDLE_VALUE) { rc = ErrCode(); @@ -4719,12 +4791,6 @@ fail: return rc; } - /** The name of the lock file in the DB environment */ -#define LOCKNAME "/lock.mdb" - /** The name of the data file in the DB environment */ -#define DATANAME "/data.mdb" - /** The suffix of the lock file when no subdir is used */ -#define LOCKSUFF "-lock" /** Only a subset of the @ref mdb_env flags can be changed * at runtime. Changing other flags requires closing the * environment and re-opening it with the new flags. @@ -4741,35 +4807,17 @@ int ESECT mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) { int oflags, rc, len, excl = -1; - char *lpath, *dpath; -#ifdef _WIN32 - wchar_t *wpath; -#endif + MDB_name fname; if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) return EINVAL; - len = strlen(path); - if (flags & MDB_NOSUBDIR) { - rc = len + sizeof(LOCKSUFF) + len + 1; - } else { - rc = len + sizeof(LOCKNAME) + len + sizeof(DATANAME); - } - lpath = malloc(rc); - if (!lpath) - return ENOMEM; - if (flags & MDB_NOSUBDIR) { - dpath = lpath + len + sizeof(LOCKSUFF); - sprintf(lpath, "%s" LOCKSUFF, path); - strcpy(dpath, path); - } else { - dpath = lpath + len + sizeof(LOCKNAME); - sprintf(lpath, "%s" LOCKNAME, path); - sprintf(dpath, "%s" DATANAME, path); - } - - rc = MDB_SUCCESS; flags |= env->me_flags; + + rc = mdb_fname_init(path, flags, &fname); + if (rc) + return rc; + if (flags & MDB_RDONLY) { /* silently ignore WRITEMAP when we're only getting read access */ flags &= ~MDB_WRITEMAP; @@ -4794,11 +4842,12 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode /* For RDONLY, get lockfile after we know datafile exists */ if (!(flags & (MDB_RDONLY|MDB_NOLOCK))) { - rc = mdb_env_setup_locks(env, lpath, mode, &excl); + rc = mdb_env_setup_locks(env, &fname, mode, &excl); if (rc) goto leave; } + mdb_fname_suffix_set(env, &fname, MDB_O_DATA); #ifdef _WIN32 if (F_ISSET(flags, MDB_RDONLY)) { oflags = GENERIC_READ; @@ -4808,19 +4857,15 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode len = OPEN_ALWAYS; } mode = FILE_ATTRIBUTE_NORMAL; - rc = utf8_to_utf16(dpath, -1, &wpath, NULL); - if (rc) - goto leave; - env->me_fd = CreateFileW(wpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, + env->me_fd = CreateFileW(fname.mn_val, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, mode, NULL); - free(wpath); #else if (F_ISSET(flags, MDB_RDONLY)) oflags = O_RDONLY; else oflags = O_RDWR | O_CREAT; - env->me_fd = open(dpath, oflags, mode); + env->me_fd = open(fname.mn_val, oflags, mode); #endif if (env->me_fd == INVALID_HANDLE_VALUE) { rc = ErrCode(); @@ -4828,7 +4873,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode } if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) { - rc = mdb_env_setup_locks(env, lpath, mode, &excl); + rc = mdb_env_setup_locks(env, &fname, mode, &excl); if (rc) goto leave; } @@ -4840,18 +4885,15 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode /* Synchronous fd for meta writes. Needed even with * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. */ + mdb_fname_suffix_set(env, &fname, MDB_O_DATA); #ifdef _WIN32 len = OPEN_EXISTING; - rc = utf8_to_utf16(dpath, -1, &wpath, NULL); - if (rc) - goto leave; - env->me_mfd = CreateFileW(wpath, oflags, + env->me_mfd = CreateFileW(fname.mn_val, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, mode | FILE_FLAG_WRITE_THROUGH, NULL); - free(wpath); #else oflags &= ~O_CREAT; - env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode); + env->me_mfd = open(fname.mn_val, oflags | MDB_DSYNC, mode); #endif if (env->me_mfd == INVALID_HANDLE_VALUE) { rc = ErrCode(); @@ -4889,7 +4931,7 @@ leave: if (rc) { mdb_env_close0(env, excl); } - free(lpath); + mdb_fname_destroy(fname); return rc; } @@ -9356,37 +9398,24 @@ mdb_env_copyfd(MDB_env *env, HANDLE fd) int ESECT mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) { - int rc, len; - char *lpath; + int rc; + MDB_name fname; HANDLE newfd = INVALID_HANDLE_VALUE; -#ifdef _WIN32 - wchar_t *wpath; -#endif - if (env->me_flags & MDB_NOSUBDIR) { - lpath = (char *)path; - } else { - len = strlen(path); - len += sizeof(DATANAME); - lpath = malloc(len); - if (!lpath) - return ENOMEM; - sprintf(lpath, "%s" DATANAME, path); - } + rc = mdb_fname_init(path, env->me_flags | MDB_NOLOCK, &fname); + if (rc) + return rc; + mdb_fname_suffix_set(env, &fname, MDB_O_DATA); /* The destination path must exist, but the destination file must not. * We don't want the OS to cache the writes, since the source data is * already in the OS cache. */ #ifdef _WIN32 - rc = utf8_to_utf16(lpath, -1, &wpath, NULL); - if (rc) - goto leave; - newfd = CreateFileW(wpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + newfd = CreateFileW(fname.mn_val, GENERIC_WRITE, 0, NULL, CREATE_NEW, FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); - free(wpath); #else - newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); + newfd = open(fname.mn_val, O_WRONLY|O_CREAT|O_EXCL, 0666); #endif if (newfd == INVALID_HANDLE_VALUE) { rc = ErrCode(); @@ -9406,8 +9435,7 @@ mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) rc = mdb_env_copyfd2(env, newfd, flags); leave: - if (!(env->me_flags & MDB_NOSUBDIR)) - free(lpath); + mdb_fname_destroy(fname); if (newfd != INVALID_HANDLE_VALUE) if (close(newfd) < 0 && rc == MDB_SUCCESS) rc = ErrCode(); @@ -10106,27 +10134,28 @@ mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc) #endif /* MDB_ROBUST_SUPPORTED */ #if defined(_WIN32) +/** Convert \b src to new wchar_t[] string with room for \b xtra extra chars */ static int ESECT -utf8_to_utf16(const char *src, int srcsize, wchar_t **dst, int *dstsize) +utf8_to_utf16(const char *src, MDB_name *dst, int xtra) { int rc, need = 0; wchar_t *result = NULL; for (;;) { /* malloc result, then fill it in */ - need = MultiByteToWideChar(CP_UTF8, 0, src, srcsize, result, need); + need = MultiByteToWideChar(CP_UTF8, 0, src, -1, result, need); if (!need) { rc = ErrCode(); free(result); return rc; } if (!result) { - result = malloc(sizeof(wchar_t) * need); + result = malloc(sizeof(wchar_t) * (need + xtra)); if (!result) return ENOMEM; continue; } - if (dstsize) - *dstsize = need; - *dst = result; + dst->mn_alloced = 1; + dst->mn_len = need - 1; + dst->mn_val = result; return MDB_SUCCESS; } } From 9c76e95dbecdfb5440a16b7770ecc0525db3236c Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 27 Sep 2016 07:03:34 +0200 Subject: [PATCH 24/39] Move opening files to mdb_fopen() No change in functionality. --- libraries/liblmdb/mdb.c | 220 +++++++++++++++++++++++----------------- 1 file changed, 125 insertions(+), 95 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index af0e9364a0..9516252cc5 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4142,16 +4142,122 @@ mdb_fname_init(const char *path, unsigned envflags, MDB_name *fname) #define mdb_fname_destroy(fname) \ do { if ((fname).mn_alloced) free((fname).mn_val); } while (0) +#ifdef O_CLOEXEC /* POSIX.1-2008: Set FD_CLOEXEC atomically at open() */ +# define MDB_CLOEXEC O_CLOEXEC +#else +# define MDB_CLOEXEC 0 +#endif + +/** File type, access mode etc. for #mdb_fopen() */ enum mdb_fopen_type { - MDB_O_DATA, MDB_O_LOCKS +#ifdef _WIN32 + MDB_O_RDONLY, MDB_O_RDWR, MDB_O_META, MDB_O_COPY, MDB_O_LOCKS +#else + /* A comment in mdb_fopen() explains some O_* flag choices. */ + MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */ + MDB_O_RDWR = O_RDWR |O_CREAT, /**< for me_fd */ + MDB_O_META = O_RDWR |MDB_DSYNC, /**< for me_mfd */ + MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL, /**< for #mdb_env_copy() */ + /** Bitmask for open() flags in enum #mdb_fopen_type. The other bits + * distinguish otherwise-equal MDB_O_* constants from each other. + */ + MDB_O_MASK = MDB_O_RDWR|MDB_CLOEXEC | MDB_O_RDONLY|MDB_O_META|MDB_O_COPY, + MDB_O_LOCKS = MDB_O_RDWR|MDB_CLOEXEC | ((MDB_O_MASK+1) & ~MDB_O_MASK) /**< for me_lfd */ +#endif }; -static void ESECT -mdb_fname_suffix_set(MDB_env *env, MDB_name *fname, enum mdb_fopen_type which) +/** Open an LMDB file. + * @param[in] env The LMDB environment. + * @param[in,out] fname Path from from #mdb_fname_init(). A suffix is + * appended if necessary to create the filename, without changing mn_len. + * @param[in] which Determines file type, access mode, etc. + * @param[in] mode The Unix permissions for the file, if we create it. + * @param[out] res Resulting file handle. + * @return 0 on success, non-zero on failure. + */ +static int ESECT +mdb_fopen(const MDB_env *env, MDB_name *fname, + enum mdb_fopen_type which, mdb_mode_t mode, + HANDLE *res) { + int rc = MDB_SUCCESS; + HANDLE fd; +#ifdef _WIN32 + DWORD acc, share, disp, attrs; +#else + int flags; +#endif + if (fname->mn_alloced) /* modifiable copy */ mdb_name_cpy(fname->mn_val + fname->mn_len, mdb_suffixes[which==MDB_O_LOCKS][F_ISSET(env->me_flags, MDB_NOSUBDIR)]); + + /* The directory must already exist. Usually the file need not. + * MDB_O_META requires the file because we already created it using + * MDB_O_RDWR. MDB_O_COPY must not overwrite an existing file. + * + * With MDB_O_COPY we do not want the OS to cache the writes, since + * the source data is already in the OS cache. + * + * The lockfile needs FD_CLOEXEC (close file descriptor on exec*()) + * to avoid the flock() issues noted under Caveats in lmdb.h. + */ + +#ifdef _WIN32 + acc = GENERIC_READ|GENERIC_WRITE; + share = FILE_SHARE_READ|FILE_SHARE_WRITE; + disp = OPEN_ALWAYS; + attrs = FILE_ATTRIBUTE_NORMAL; + switch (which) { + case MDB_O_RDONLY: /* read-only datafile */ + acc = GENERIC_READ; + disp = OPEN_EXISTING; + break; + case MDB_O_META: /* for writing metapages */ + disp = OPEN_EXISTING; + attrs = FILE_ATTRIBUTE_NORMAL|FILE_FLAG_WRITE_THROUGH; + break; + case MDB_O_COPY: /* mdb_env_copy() & co */ + acc = GENERIC_WRITE; + share = 0; + disp = CREATE_NEW; + attrs = FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH; + break; + default: break; /* silence gcc -Wswitch (not all enum values handled) */ + } + fd = CreateFileW(fname->mn_val, acc, share, NULL, disp, attrs, NULL); +#else + fd = open(fname->mn_val, which & MDB_O_MASK, mode); +#endif + + if (fd == INVALID_HANDLE_VALUE) + rc = ErrCode(); +#ifndef _WIN32 + else { + if (which == MDB_O_LOCKS) { + /* Set CLOEXEC if we could not pass it to open() */ + if (!MDB_CLOEXEC && (flags = fcntl(fd, F_GETFD)) != -1) + (void) fcntl(fd, F_SETFD, flags | FD_CLOEXEC); + } + if (which == MDB_O_COPY && env->me_psize >= env->me_os_psize) { + /* This may require buffer alignment. There is no portable + * way to ask how much, so we require OS pagesize alignment. + */ +# ifdef F_NOCACHE /* __APPLE__ */ + (void) fcntl(fd, F_NOCACHE, 1); +# elif defined O_DIRECT + /* open(...O_DIRECT...) would break on filesystems without + * O_DIRECT support (ITS#7682). Try to set it here instead. + */ + if ((flags = fcntl(fd, F_GETFL)) != -1) + (void) fcntl(fd, F_SETFL, flags | O_DIRECT); +# endif + } + } +#endif /* !_WIN32 */ + + *res = fd; + return rc; } @@ -4572,36 +4678,18 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl) # define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT #else # define MDB_ERRCODE_ROFS EROFS -#ifdef O_CLOEXEC /* Linux: Open file and set FD_CLOEXEC atomically */ -# define MDB_CLOEXEC O_CLOEXEC -#else -# define MDB_CLOEXEC 0 -#endif - int fdflags; #endif int rc; off_t size, rsize; - mdb_fname_suffix_set(env, fname, MDB_O_LOCKS); -#ifdef _WIN32 - env->me_lfd = CreateFileW(fname->mn_val, GENERIC_READ|GENERIC_WRITE, - FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, - FILE_ATTRIBUTE_NORMAL, NULL); -#else - env->me_lfd = open(fname->mn_val, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); -#endif - if (env->me_lfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); + rc = mdb_fopen(env, fname, MDB_O_LOCKS, mode, &env->me_lfd); + if (rc) { + /* Omit lockfile if read-only env on read-only filesystem */ if (rc == MDB_ERRCODE_ROFS && (env->me_flags & MDB_RDONLY)) { return MDB_SUCCESS; } goto fail; } -#ifndef _WIN32 - /* Lose record locks when exec*() */ - if (!(MDB_CLOEXEC) && (fdflags = fcntl(env->me_lfd, F_GETFD)) != -1) - fcntl(env->me_lfd, F_SETFD, fdflags | FD_CLOEXEC); -#endif if (!(env->me_flags & MDB_NOTLS)) { rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); @@ -4806,7 +4894,7 @@ fail: int ESECT mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) { - int oflags, rc, len, excl = -1; + int rc, excl = -1; MDB_name fname; if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) @@ -4847,30 +4935,11 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode goto leave; } - mdb_fname_suffix_set(env, &fname, MDB_O_DATA); -#ifdef _WIN32 - if (F_ISSET(flags, MDB_RDONLY)) { - oflags = GENERIC_READ; - len = OPEN_EXISTING; - } else { - oflags = GENERIC_READ|GENERIC_WRITE; - len = OPEN_ALWAYS; - } - mode = FILE_ATTRIBUTE_NORMAL; - env->me_fd = CreateFileW(fname.mn_val, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, - NULL, len, mode, NULL); -#else - if (F_ISSET(flags, MDB_RDONLY)) - oflags = O_RDONLY; - else - oflags = O_RDWR | O_CREAT; - - env->me_fd = open(fname.mn_val, oflags, mode); -#endif - if (env->me_fd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); + rc = mdb_fopen(env, &fname, + (flags & MDB_RDONLY) ? MDB_O_RDONLY : MDB_O_RDWR, + mode, &env->me_fd); + if (rc) goto leave; - } if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) { rc = mdb_env_setup_locks(env, &fname, mode, &excl); @@ -4885,20 +4954,9 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode /* Synchronous fd for meta writes. Needed even with * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. */ - mdb_fname_suffix_set(env, &fname, MDB_O_DATA); -#ifdef _WIN32 - len = OPEN_EXISTING; - env->me_mfd = CreateFileW(fname.mn_val, oflags, - FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, - mode | FILE_FLAG_WRITE_THROUGH, NULL); -#else - oflags &= ~O_CREAT; - env->me_mfd = open(fname.mn_val, oflags | MDB_DSYNC, mode); -#endif - if (env->me_mfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); + rc = mdb_fopen(env, &fname, MDB_O_META, mode, &env->me_mfd); + if (rc) goto leave; - } } DPRINTF(("opened dbenv %p", (void *) env)); if (excl > 0) { @@ -9403,43 +9461,15 @@ mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags) HANDLE newfd = INVALID_HANDLE_VALUE; rc = mdb_fname_init(path, env->me_flags | MDB_NOLOCK, &fname); - if (rc) - return rc; - mdb_fname_suffix_set(env, &fname, MDB_O_DATA); - - /* The destination path must exist, but the destination file must not. - * We don't want the OS to cache the writes, since the source data is - * already in the OS cache. - */ -#ifdef _WIN32 - newfd = CreateFileW(fname.mn_val, GENERIC_WRITE, 0, NULL, CREATE_NEW, - FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); -#else - newfd = open(fname.mn_val, O_WRONLY|O_CREAT|O_EXCL, 0666); -#endif - if (newfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; + if (rc == MDB_SUCCESS) { + rc = mdb_fopen(env, &fname, MDB_O_COPY, 0666, &newfd); + mdb_fname_destroy(fname); } - - if (env->me_psize >= env->me_os_psize) { -#ifdef F_NOCACHE /* __APPLE__ */ - (void) fcntl(newfd, F_NOCACHE, 1); -#elif defined O_DIRECT - /* Set O_DIRECT if the file system supports it */ - if ((rc = fcntl(newfd, F_GETFL)) != -1) - (void) fcntl(newfd, F_SETFL, rc | O_DIRECT); -#endif - } - - rc = mdb_env_copyfd2(env, newfd, flags); - -leave: - mdb_fname_destroy(fname); - if (newfd != INVALID_HANDLE_VALUE) + if (rc == MDB_SUCCESS) { + rc = mdb_env_copyfd2(env, newfd, flags); if (close(newfd) < 0 && rc == MDB_SUCCESS) rc = ErrCode(); - + } return rc; } From eb1a307e4a5427a38d2fa0c0e329ca0ee7593479 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 27 Sep 2016 07:03:38 +0200 Subject: [PATCH 25/39] ITS#8505 Set FD_CLOEXEC for me_mfd,env_copy as well --- libraries/liblmdb/mdb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 9516252cc5..36094bf4f3 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4156,8 +4156,8 @@ enum mdb_fopen_type { /* A comment in mdb_fopen() explains some O_* flag choices. */ MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */ MDB_O_RDWR = O_RDWR |O_CREAT, /**< for me_fd */ - MDB_O_META = O_RDWR |MDB_DSYNC, /**< for me_mfd */ - MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL, /**< for #mdb_env_copy() */ + MDB_O_META = O_RDWR |MDB_DSYNC |MDB_CLOEXEC, /**< for me_mfd */ + MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL|MDB_CLOEXEC, /**< for #mdb_env_copy() */ /** Bitmask for open() flags in enum #mdb_fopen_type. The other bits * distinguish otherwise-equal MDB_O_* constants from each other. */ @@ -4201,6 +4201,9 @@ mdb_fopen(const MDB_env *env, MDB_name *fname, * * The lockfile needs FD_CLOEXEC (close file descriptor on exec*()) * to avoid the flock() issues noted under Caveats in lmdb.h. + * Also set it for other filehandles which the user cannot get at + * and close himself, which he may need after fork(). I.e. all but + * me_fd, which programs do use via mdb_env_get_fd(). */ #ifdef _WIN32 @@ -4234,7 +4237,7 @@ mdb_fopen(const MDB_env *env, MDB_name *fname, rc = ErrCode(); #ifndef _WIN32 else { - if (which == MDB_O_LOCKS) { + if (which != MDB_O_RDONLY && which != MDB_O_RDWR) { /* Set CLOEXEC if we could not pass it to open() */ if (!MDB_CLOEXEC && (flags = fcntl(fd, F_GETFD)) != -1) (void) fcntl(fd, F_SETFD, flags | FD_CLOEXEC); From 6e81d4071b441840fc09855643f35ea992784002 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 27 Sep 2016 07:03:40 +0200 Subject: [PATCH 26/39] ITS#8505 Protect parent from fork()-pthread_exit() --- libraries/liblmdb/mdb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 36094bf4f3..e2f1bbca1d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -4429,7 +4429,11 @@ mdb_env_reader_dest(void *ptr) { MDB_reader *reader = ptr; - reader->mr_pid = 0; +#ifndef _WIN32 + if (reader->mr_pid == getpid()) /* catch pthread_exit() in child process */ +#endif + /* We omit the mutex, so do this atomically (i.e. skip mr_txnid) */ + reader->mr_pid = 0; } #ifdef _WIN32 From e911ad64b905dc6532270b2de5f7cb46e80167b4 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 27 Sep 2016 07:03:42 +0200 Subject: [PATCH 27/39] ITS#8505 Clarify fork() caveat, mdb_env_get_fd(), flock->fcntl. --- libraries/liblmdb/lmdb.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index a356538595..ba74a220bf 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -96,11 +96,12 @@ * transactions. Each transaction belongs to one thread. See below. * The #MDB_NOTLS flag changes this for read-only transactions. * - * - Use an MDB_env* in the process which opened it, without fork()ing. + * - Use an MDB_env* in the process which opened it, not after fork(). * * - Do not have open an LMDB database twice in the same process at * the same time. Not even from a plain open() call - close()ing it - * breaks flock() advisory locking. + * breaks fcntl() advisory locking. (It is OK to reopen it after + * fork() - exec*(), since the lockfile has FD_CLOEXEC set.) * * - Avoid long-lived transactions. Read transactions prevent * reuse of pages freed by newer write transactions, thus the @@ -794,6 +795,10 @@ int mdb_env_get_flags(MDB_env *env, unsigned int *flags); int mdb_env_get_path(MDB_env *env, const char **path); /** @brief Return the filedescriptor for the given environment. + * + * This function may be called after fork(), so the descriptor can be + * closed before exec*(). Other LMDB file descriptors have FD_CLOEXEC. + * (Until LMDB 0.9.18, only the lockfile had that.) * * @param[in] env An environment handle returned by #mdb_env_create() * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor. From 8de0788b9c7b2dce693b85801e9564576b9d9a21 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Tue, 27 Sep 2016 07:03:45 +0200 Subject: [PATCH 28/39] Only set me_mfd if needed. Drop unused read access. --- libraries/liblmdb/mdb.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index e2f1bbca1d..fe7f6888e8 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1239,7 +1239,7 @@ typedef struct MDB_pgstate { struct MDB_env { HANDLE me_fd; /**< The main data file */ HANDLE me_lfd; /**< The lock file */ - HANDLE me_mfd; /**< just for writing the meta pages */ + HANDLE me_mfd; /**< For writing and syncing the meta pages */ /** Failed to update the meta page. Probably an I/O error. */ #define MDB_FATAL_ERROR 0x80000000U /** Some fields are initialized. */ @@ -3827,7 +3827,10 @@ mdb_env_write_meta(MDB_txn *txn) len = sizeof(MDB_meta) - off; off += (char *)mp - env->me_map; - /* Write to the SYNC fd */ + /* Write to the SYNC fd unless MDB_NOSYNC/MDB_NOMETASYNC. + * (me_mfd goes to the same file as me_fd, but writing to it + * also syncs to disk. Avoids a separate fdatasync() call.) + */ mfd = (flags & (MDB_NOSYNC|MDB_NOMETASYNC)) ? env->me_fd : env->me_mfd; #ifdef _WIN32 { @@ -4156,7 +4159,7 @@ enum mdb_fopen_type { /* A comment in mdb_fopen() explains some O_* flag choices. */ MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */ MDB_O_RDWR = O_RDWR |O_CREAT, /**< for me_fd */ - MDB_O_META = O_RDWR |MDB_DSYNC |MDB_CLOEXEC, /**< for me_mfd */ + MDB_O_META = O_WRONLY|MDB_DSYNC |MDB_CLOEXEC, /**< for me_mfd */ MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL|MDB_CLOEXEC, /**< for #mdb_env_copy() */ /** Bitmask for open() flags in enum #mdb_fopen_type. The other bits * distinguish otherwise-equal MDB_O_* constants from each other. @@ -4217,6 +4220,7 @@ mdb_fopen(const MDB_env *env, MDB_name *fname, disp = OPEN_EXISTING; break; case MDB_O_META: /* for writing metapages */ + acc = GENERIC_WRITE; disp = OPEN_EXISTING; attrs = FILE_ATTRIBUTE_NORMAL|FILE_FLAG_WRITE_THROUGH; break; @@ -4955,9 +4959,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode } if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { - if (flags & (MDB_RDONLY|MDB_WRITEMAP)) { - env->me_mfd = env->me_fd; - } else { + if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) { /* Synchronous fd for meta writes. Needed even with * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. */ @@ -5040,7 +5042,7 @@ mdb_env_close0(MDB_env *env, int excl) if (env->me_map) { munmap(env->me_map, env->me_mapsize); } - if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) + if (env->me_mfd != INVALID_HANDLE_VALUE) (void) close(env->me_mfd); if (env->me_fd != INVALID_HANDLE_VALUE) (void) close(env->me_fd); From 2c3b019e5a5018e8f6c4090707c2739df7110b8a Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 7 Dec 2016 18:55:21 +0100 Subject: [PATCH 29/39] Note functions which must set MDB_TXN_ERROR on failure Other functions depend on them to do so. For mdb_node_read(), instead remove such a dependence. --- libraries/liblmdb/mdb.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index fe7f6888e8..5c3989c6b5 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1747,6 +1747,7 @@ mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) /** Allocate memory for a page. * Re-use old malloc'd pages first for singletons, otherwise just malloc. + * Set #MDB_TXN_ERROR on failure. */ static MDB_page * mdb_page_malloc(MDB_txn *txn, unsigned num) @@ -2114,7 +2115,7 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp) } /** Allocate page numbers and memory for writing. Maintain me_pglast, - * me_pghead and mt_next_pgno. + * me_pghead and mt_next_pgno. Set #MDB_TXN_ERROR on failure. * * If there are free pages available from older transactions, they * are re-used first. Otherwise allocate a new page at mt_next_pgno. @@ -2237,7 +2238,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) np = m2.mc_pg[m2.mc_top]; leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); if ((rc = mdb_node_read(&m2, leaf, &data)) != MDB_SUCCESS) - return rc; + goto fail; idl = (MDB_ID *) data.mv_data; i = idl[0]; @@ -2387,6 +2388,7 @@ mdb_page_unspill(MDB_txn *txn, MDB_page *mp, MDB_page **ret) } /** Touch a page: make it dirty and re-insert into tree with updated pgno. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc cursor pointing to the page to be touched * @return 0 on success, non-zero on failure. */ @@ -5342,7 +5344,9 @@ mdb_cursor_pop(MDB_cursor *mc) } } -/** Push a page onto the top of the cursor's stack. */ +/** Push a page onto the top of the cursor's stack. + * Set #MDB_TXN_ERROR on failure. + */ static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) { @@ -5362,6 +5366,7 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) } /** Find the address of the page corresponding to a given page number. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc the cursor accessing the page. * @param[in] pgno the page number for the page to retrieve. * @param[out] ret address of a pointer where the page's address will be stored. @@ -7100,6 +7105,7 @@ fail: } /** Allocate and initialize new pages for a database. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc a cursor on the database being added to. * @param[in] flags flags defining what type of page is being allocated. * @param[in] num the number of pages to allocate. This is usually 1, @@ -7185,6 +7191,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key) } /** Add a node to the page pointed to by the cursor. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc The cursor for this operation. * @param[in] indx The index on the page where the new node should be added. * @param[in] key The key for the new node. @@ -7673,6 +7680,7 @@ mdb_cursor_dbi(MDB_cursor *mc) } /** Replace the key for a branch node with a new key. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc Cursor pointing to the node to operate on. * @param[in] key The new key to use. * @return 0 on success, non-zero on failure. @@ -8501,6 +8509,7 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, } /** Split a page and insert a new node. + * Set #MDB_TXN_ERROR on failure. * @param[in,out] mc Cursor pointing to the page and desired insertion index. * The cursor will be updated to point to the actual page and index where * the node got inserted after the split. From 47393f4ea2dd6d5af40d72a587ca486314567a90 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 7 Dec 2016 19:04:19 +0100 Subject: [PATCH 30/39] doxygen cleanup --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 5c3989c6b5..f059fd6f5d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -10077,7 +10077,7 @@ mdb_reader_check(MDB_env *env, int *dead) return env->me_txns ? mdb_reader_check0(env, 0, dead) : MDB_SUCCESS; } -/** As #mdb_reader_check(). rlocked = . */ +/** As #mdb_reader_check(). \b rlocked is set if caller locked #me_rmutex. */ static int ESECT mdb_reader_check0(MDB_env *env, int rlocked, int *dead) { From e8e8293359834e9243f440030e368f287f12db33 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 14 Dec 2016 00:23:01 +0100 Subject: [PATCH 31/39] ITS#8542 mdb_dbi_open(): Protect mainDB cursors --- libraries/liblmdb/mdb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index f059fd6f5d..32349dffe9 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -9724,7 +9724,8 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db memset(&dummy, 0, sizeof(dummy)); dummy.md_root = P_INVALID; dummy.md_flags = flags & PERSISTENT_FLAGS; - rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); + WITH_CURSOR_TRACKING(mc, + rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA)); dbflag |= DB_DIRTY; } From 894e88bf895529b2f4682f74c7a8c69a0c61cc00 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 20 Oct 2016 09:51:22 +0200 Subject: [PATCH 32/39] ITS#8504 mdb_env_copyfd2(): Don't abort on SIGPIPE Return EPIPE instead. --- libraries/liblmdb/mdb.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 32349dffe9..33c699883c 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -129,6 +129,7 @@ typedef SSIZE_T ssize_t; #ifndef _WIN32 #include +#include #ifdef MDB_USE_POSIX_SEM # define MDB_USE_HASH 1 #include @@ -8995,10 +8996,17 @@ mdb_env_copythr(void *arg) #define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) #else int len; + sigset_t set; #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) + + sigemptyset(&set); + sigaddset(&set, SIGPIPE); #endif pthread_mutex_lock(&my->mc_mutex); +#ifndef _WIN32 + my->mc_error = pthread_sigmask(SIG_BLOCK, &set, NULL); +#endif for(;;) { while (!my->mc_new) pthread_cond_wait(&my->mc_cond, &my->mc_mutex); @@ -9012,6 +9020,12 @@ again: DO_WRITE(rc, my->mc_fd, ptr, wsize, len); if (!rc) { rc = ErrCode(); +#ifndef _WIN32 + if (rc == EPIPE) { + int tmp; + sigwait(&set, &tmp); + } +#endif break; } else if (len > 0) { rc = MDB_SUCCESS; From bb08f63833536cf8dc30226b9b2271d87f8c0876 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Thu, 20 Oct 2016 09:51:22 +0200 Subject: [PATCH 33/39] ITS#8504 Fix prev commit: mc_error, #ifdef SIGPIPE Never clear mc_error, we could lose a failure in the other thread. --- libraries/liblmdb/mdb.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 33c699883c..7a064f753d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -8996,17 +8996,17 @@ mdb_env_copythr(void *arg) #define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) #else int len; - sigset_t set; #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) - +#ifdef SIGPIPE + sigset_t set; sigemptyset(&set); sigaddset(&set, SIGPIPE); + if ((rc = pthread_sigmask(SIG_BLOCK, &set, NULL)) != 0) + my->mc_error = rc; +#endif #endif pthread_mutex_lock(&my->mc_mutex); -#ifndef _WIN32 - my->mc_error = pthread_sigmask(SIG_BLOCK, &set, NULL); -#endif for(;;) { while (!my->mc_new) pthread_cond_wait(&my->mc_cond, &my->mc_mutex); @@ -9020,8 +9020,11 @@ again: DO_WRITE(rc, my->mc_fd, ptr, wsize, len); if (!rc) { rc = ErrCode(); -#ifndef _WIN32 +#if defined(SIGPIPE) && !defined(_WIN32) if (rc == EPIPE) { + /* Collect the pending SIGPIPE, otherwise at least OS X + * gives it to the process on thread-exit (ITS#8504). + */ int tmp; sigwait(&set, &tmp); } From 7895f462298da64902bba7d5ff00ac28295b1ed8 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 21 Dec 2016 13:02:00 +0000 Subject: [PATCH 34/39] More 0.9.19 updates --- libraries/liblmdb/CHANGES | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/libraries/liblmdb/CHANGES b/libraries/liblmdb/CHANGES index 454e6247b5..ddd341588a 100644 --- a/libraries/liblmdb/CHANGES +++ b/libraries/liblmdb/CHANGES @@ -8,6 +8,19 @@ LMDB 0.9.19 Release Engineering Optimize mdb_drop Fix xcursors after mdb_cursor_del (ITS#8406) Fix MDB_NEXT_DUP after mdb_cursor_del (ITS#8412) + Fix mdb_cursor_put resetting C_EOF (ITS#8489) + Fix mdb_env_copyfd2 to return EPIPE on SIGPIPE (ITS#8504) + Fix mdb_env_copy with empty DB (ITS#8209) + Fix behaviors with fork (ITS#8505) + Fix mdb_dbi_open with mainDB cursors (ITS#8542) + Fix utf8_to_utf16 error checks (ITS#7992) + Fix F_NOCACHE on MacOS, error is non-fatal (ITS#7682) + Build + Make shared lib suffix overridable (ITS#8481) + Documentation + Cleanup doxygen nits + Note reserved vs actual mem/disk usage + LMDB 0.9.18 Release (2016/02/05) Fix robust mutex detection on glibc 2.10-11 (ITS#8330) From a4976c12e51de98660c9876edabf70080b9bbc85 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 21 Dec 2016 16:33:47 +0100 Subject: [PATCH 35/39] Doxygen fixes. Use DISTRIBUTE_GROUP_DOC. - DISTRIBUTE_GROUP_DOC makes doxygen give several fields the same doc: mn_hi + mn_lo in MDB_node. - With mdb_mutex_t + mdb_mutexref_t, instead split them up. --- libraries/liblmdb/Doxyfile | 2 +- libraries/liblmdb/mdb.c | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/libraries/liblmdb/Doxyfile b/libraries/liblmdb/Doxyfile index 5047c0bb1f..5ca2cfe8f6 100644 --- a/libraries/liblmdb/Doxyfile +++ b/libraries/liblmdb/Doxyfile @@ -253,7 +253,7 @@ IDL_PROPERTY_SUPPORT = YES # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. -DISTRIBUTE_GROUP_DOC = NO +DISTRIBUTE_GROUP_DOC = YES # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 7a064f753d..57c6e4cde1 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -341,15 +341,15 @@ mdb_sem_wait(sem_t *sem) } #else /* MDB_USE_POSIX_MUTEX: */ - /** Shared mutex/semaphore as it is stored (mdb_mutex_t), and as - * local variables keep it (mdb_mutexref_t). + /** Shared mutex/semaphore as the original is stored. * - * When #mdb_mutexref_t is a pointer declaration and #mdb_mutex_t is - * not, then it is array[size 1] so it can be assigned to a pointer. - * @{ + * Not for copies. Instead it can be assigned to an #mdb_mutexref_t. + * When mdb_mutexref_t is a pointer and mdb_mutex_t is not, then it + * is array[size 1] so it can be assigned to the pointer. */ -typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; - /* @} */ +typedef pthread_mutex_t mdb_mutex_t[1]; + /** Reference to an #mdb_mutex_t */ +typedef pthread_mutex_t *mdb_mutexref_t; /** Lock the reader or writer mutex. * Returns 0 or a code to give #mdb_mutex_failed(), as in #LOCK_MUTEX(). */ @@ -874,19 +874,21 @@ typedef struct MDB_page { /** Header for a single key/data pair within a page. * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. * We guarantee 2-byte alignment for 'MDB_node's. + * + * #mn_lo and #mn_hi are used for data size on leaf nodes, and for child + * pgno on branch nodes. On 64 bit platforms, #mn_flags is also used + * for pgno. (Branch nodes have no flags). Lo and hi are in host byte + * order in case some accesses can be optimized to 32-bit word access. */ typedef struct MDB_node { - /** lo and hi are used for data size on leaf nodes and for - * child pgno on branch nodes. On 64 bit platforms, flags - * is also used for pgno. (Branch nodes have no flags). - * They are in host byte order in case that lets some - * accesses be optimized into a 32-bit word access. - */ + /** part of data size or pgno + * @{ */ #if BYTE_ORDER == LITTLE_ENDIAN - unsigned short mn_lo, mn_hi; /**< part of data size or pgno */ + unsigned short mn_lo, mn_hi; #else unsigned short mn_hi, mn_lo; #endif + /** @} */ /** @defgroup mdb_node Node Flags * @ingroup internal * Flags for node headers. From 9db2b6e128a821b3457cc555f61328fd53d11676 Mon Sep 17 00:00:00 2001 From: Hallvard Furuseth Date: Wed, 21 Dec 2016 21:40:14 +0100 Subject: [PATCH 36/39] More MDB_node doc --- libraries/liblmdb/mdb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 57c6e4cde1..7a181d45f6 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -879,6 +879,11 @@ typedef struct MDB_page { * pgno on branch nodes. On 64 bit platforms, #mn_flags is also used * for pgno. (Branch nodes have no flags). Lo and hi are in host byte * order in case some accesses can be optimized to 32-bit word access. + * + * Leaf node flags describe node contents. #F_BIGDATA says the node's + * data part is the page number of an overflow page with actual data. + * #F_DUPDATA and #F_SUBDATA can be combined giving duplicate data in + * a sub-page/sub-database, and named databases (just #F_SUBDATA). */ typedef struct MDB_node { /** part of data size or pgno From 74d64d0eb245ce07383abcd444c23e7438e8a083 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 Dec 2016 18:32:14 +0000 Subject: [PATCH 37/39] ITS#8554 kFreeBSD is like BSD Doesn't have POSIX robust mutexes - GNU userland on BSD kernel --- libraries/liblmdb/mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 7a181d45f6..23c1f009b0 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -120,7 +120,7 @@ typedef SSIZE_T ssize_t; #include /* defines BYTE_ORDER on HPUX and Solaris */ #endif -#if defined(__APPLE__) || defined (BSD) +#if defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__) # define MDB_USE_POSIX_SEM 1 # define MDB_FDATASYNC fsync #elif defined(ANDROID) From 6cf793c286c404e77fb5a0fa905b5a213154dd5a Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 Dec 2016 18:33:10 +0000 Subject: [PATCH 38/39] ITS#8554 --- libraries/liblmdb/CHANGES | 1 + 1 file changed, 1 insertion(+) diff --git a/libraries/liblmdb/CHANGES b/libraries/liblmdb/CHANGES index ddd341588a..47fb0616b2 100644 --- a/libraries/liblmdb/CHANGES +++ b/libraries/liblmdb/CHANGES @@ -13,6 +13,7 @@ LMDB 0.9.19 Release Engineering Fix mdb_env_copy with empty DB (ITS#8209) Fix behaviors with fork (ITS#8505) Fix mdb_dbi_open with mainDB cursors (ITS#8542) + Fix robust mutexes on kFreeBSD (ITS#8554) Fix utf8_to_utf16 error checks (ITS#7992) Fix F_NOCACHE on MacOS, error is non-fatal (ITS#7682) Build From 14cff072ec29f48093a9d40cc79934cf5376af4a Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 28 Dec 2016 18:35:05 +0000 Subject: [PATCH 39/39] Release 0.9.19 --- libraries/liblmdb/CHANGES | 2 +- libraries/liblmdb/lmdb.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/CHANGES b/libraries/liblmdb/CHANGES index 47fb0616b2..fafaa404da 100644 --- a/libraries/liblmdb/CHANGES +++ b/libraries/liblmdb/CHANGES @@ -1,6 +1,6 @@ LMDB 0.9 Change Log -LMDB 0.9.19 Release Engineering +LMDB 0.9.19 Release (2016/12/28) Fix mdb_env_cwalk cursor init (ITS#8424) Fix robust mutexes on Solaris 10/11 (ITS#8339) Tweak Win32 error message buffer diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index ba74a220bf..0aeff0d320 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -200,7 +200,7 @@ typedef int mdb_filehandle_t; /** Library minor version */ #define MDB_VERSION_MINOR 9 /** Library patch version */ -#define MDB_VERSION_PATCH 18 +#define MDB_VERSION_PATCH 19 /** Combine args a,b,c into a single integer for easy version comparisons */ #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) @@ -210,7 +210,7 @@ typedef int mdb_filehandle_t; MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) /** The release date of this library version */ -#define MDB_VERSION_DATE "February 5, 2016" +#define MDB_VERSION_DATE "December 28, 2016" /** A stringifier for the version info */ #define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")"