From 425525fbec5375220203d94faaf494084b7ee299 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 8 Dec 2025 16:07:00 +0000 Subject: [PATCH] ITS#9224 lmdb: add support for PREPARE/2-phase commit --- libraries/liblmdb/lmdb.h | 55 +++++++++++++++++++++++++++++++++++++- libraries/liblmdb/mdb.c | 57 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 5 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index 4ab108116b..aed43a8c66 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -524,8 +524,12 @@ typedef enum MDB_cursor_op { #define MDB_CRYPTO_FAIL (-30777) /** Environment encryption mismatch */ #define MDB_ENV_ENCRYPTION (-30776) + /** Transaction was already prepared */ +#define MDB_TXN_PENDING (-30775) + /** Environment can't rollback the last transaction */ +#define MDB_CANT_ROLLBACK (-30774) /** The last defined error code */ -#define MDB_LAST_ERRCODE MDB_ENV_ENCRYPTION +#define MDB_LAST_ERRCODE MDB_CANT_ROLLBACK /** @} */ /** @brief Statistics for a database in the environment */ @@ -1128,6 +1132,55 @@ mdb_size_t mdb_txn_id(MDB_txn *txn); */ int mdb_txn_commit(MDB_txn *txn); + /** @brief Prepare to commit all the operations of a transaction into the database. + * + * This function exists to support two-phase commit protocols. + * All writes in the transaction are persisted to storage, but the final + * metapage update is not performed. All cursors on the transaction will be + * closed. Only #mdb_txn_abort() or #mdb_txn_commit() are valid after this + * call. It is assumed that once the regular data pages are successfully written + * by this call, the metapage update from a subsequent commit cannot fail, but + * hardware-level media failures could still break this assumption. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_txn_prepare(MDB_txn *txn); + + /** @brief Rollback the last committed transaction in the environment. + * + * This function exists to support two-phase commit protocols. + * The metapage update for the last committed transaction will be zeroed, + * so its changes will be ignored. It should only be used when the local + * phase of a multi-phase transaction has fully committed, but some other + * remote phase which successfully prepared has failed to commit. + * This function may not be called twice in a row. No other operations + * may be performed on the environment, by any processes, between the + * preceding #mdb_txn_commit() and this call. + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] txnid The ID of the transaction to rollback, obtained from + * #mdb_txnid() on the previous transaction. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_env_rollback(MDB_env *env, mdb_size_t txnid); + /** @brief Abandon all the operations of the transaction instead of saving them. * * The transaction handle is freed. It and its cursors must not be used diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 626c0c3cf4..bfecbb5b9a 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1500,8 +1500,9 @@ struct MDB_txn { #define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */ #define MDB_TXN_HAS_CHILD 0x10 /**< txn has an #MDB_txn.%mt_child */ #define MDB_TXN_DIRTYNUM 0x20 /**< dirty list uses nump list */ +#define MDB_TXN_PREPARE 0x40 /**< prepare txn, don't fully commit */ /** most operations on the txn are currently illegal */ -#define MDB_TXN_BLOCKED (MDB_TXN_FINISHED|MDB_TXN_ERROR|MDB_TXN_HAS_CHILD) +#define MDB_TXN_BLOCKED (MDB_TXN_FINISHED|MDB_TXN_ERROR|MDB_TXN_HAS_CHILD|MDB_TXN_PREPARE) /** @} */ unsigned int mt_flags; /**< @ref mdb_txn */ /** #dirty_list room: Array size - \#dirty pages visible to this txn. @@ -1888,6 +1889,8 @@ static char *const mdb_errstr[] = { "MDB_BAD_CHECKSUM: Page checksum mismatch", "MDB_CRYPTO_FAIL: Page encryption or decryption failed", "MDB_ENV_ENCRYPTION: Environment encryption mismatch", + "MDB_TXN_PENDING: Transaction already prepared, must abort or commit", + "MDB_CANT_ROLLBACK: Environment can't rollback last transaction", }; char * @@ -4263,7 +4266,7 @@ done: static int ESECT mdb_env_share_locks(MDB_env *env, int *excl); static int -_mdb_txn_commit(MDB_txn *txn) +_mdb_txn_commit(MDB_txn *txn, int flag) { int rc; unsigned int i, end_mode; @@ -4276,7 +4279,7 @@ _mdb_txn_commit(MDB_txn *txn) end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE; if (txn->mt_child) { - rc = _mdb_txn_commit(txn->mt_child); + rc = _mdb_txn_commit(txn->mt_child, 0); if (rc) goto fail; } @@ -4287,6 +4290,10 @@ _mdb_txn_commit(MDB_txn *txn) goto done; } + if (F_ISSET(txn->mt_flags, MDB_TXN_PREPARE)) { + goto prepared; + } + if (txn->mt_flags & (MDB_TXN_FINISHED|MDB_TXN_ERROR)) { DPUTS("txn has failed/finished, can't commit"); if (txn->mt_parent) @@ -4486,6 +4493,12 @@ _mdb_txn_commit(MDB_txn *txn) if (!F_ISSET(txn->mt_flags, MDB_TXN_NOSYNC) && (rc = mdb_env_sync0(env, 0, txn->mt_next_pgno))) goto fail; + if (F_ISSET(flag, MDB_TXN_PREPARE)) { + txn->mt_flags |= MDB_TXN_PREPARE; + return MDB_SUCCESS; + } + +prepared: if ((rc = mdb_env_write_meta(txn))) goto fail; end_mode = MDB_END_COMMITTED|MDB_END_UPDATE; @@ -4512,7 +4525,43 @@ int mdb_txn_commit(MDB_txn *txn) { MDB_TRACE(("%p", txn)); - return _mdb_txn_commit(txn); + return _mdb_txn_commit(txn, 0); +} + +int +mdb_txn_prepare(MDB_txn *txn) +{ + MDB_TRACE(("%p", txn)); + if (F_ISSET(txn->mt_flags, MDB_TXN_PREPARE)) + return MDB_TXN_PENDING; + return _mdb_txn_commit(txn, MDB_TXN_PREPARE); +} + +int +mdb_env_rollback(MDB_env *env, mdb_size_t txnid) +{ + MDB_meta **metas = env->me_metas; + int newest, previous, rc = 0; + + if (env->me_txns && LOCK_MUTEX(rc, env, env->me_wmutex)) + return rc; + newest = metas[0]->mm_txnid < metas[1]->mm_txnid; + previous = newest ^ 1; + if (!metas[previous]->mm_txnid || metas[newest]->mm_txnid != txnid) + rc = MDB_CANT_ROLLBACK; + else { + MDB_txn txn = {0}; + MDB_db dbs[2] = {0}; + txn.mt_env = env; + txn.mt_dbs = dbs; + rc = mdb_env_write_meta(&txn); + } + if (env->me_txns) { + if (rc == MDB_SUCCESS) + env->me_txns->mti_txnid = metas[previous]->mm_txnid; + UNLOCK_MUTEX(env->me_wmutex); + } + return rc; } static int ESECT mdb_env_map(MDB_env *env, void *addr);