diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index 4ab108116b..aed43a8c66 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -524,8 +524,12 @@ typedef enum MDB_cursor_op {
#define MDB_CRYPTO_FAIL (-30777)
/** Environment encryption mismatch */
#define MDB_ENV_ENCRYPTION (-30776)
+ /** Transaction was already prepared */
+#define MDB_TXN_PENDING (-30775)
+ /** Environment can't rollback the last transaction */
+#define MDB_CANT_ROLLBACK (-30774)
/** The last defined error code */
-#define MDB_LAST_ERRCODE MDB_ENV_ENCRYPTION
+#define MDB_LAST_ERRCODE MDB_CANT_ROLLBACK
/** @} */
/** @brief Statistics for a database in the environment */
@@ -1128,6 +1132,55 @@ mdb_size_t mdb_txn_id(MDB_txn *txn);
*/
int mdb_txn_commit(MDB_txn *txn);
+ /** @brief Prepare to commit all the operations of a transaction into the database.
+ *
+ * This function exists to support two-phase commit protocols.
+ * All writes in the transaction are persisted to storage, but the final
+ * metapage update is not performed. All cursors on the transaction will be
+ * closed. Only #mdb_txn_abort() or #mdb_txn_commit() are valid after this
+ * call. It is assumed that once the regular data pages are successfully written
+ * by this call, the metapage update from a subsequent commit cannot fail, but
+ * hardware-level media failures could still break this assumption.
+ * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+ * @return A non-zero error value on failure and 0 on success. Some possible
+ * errors are:
+ *
+ * - EINVAL - an invalid parameter was specified.
+ *
- ENOSPC - no more disk space.
+ *
- EIO - a low-level I/O error occurred while writing.
+ *
- ENOMEM - out of memory.
+ *
- #MDB_TXN_PENDING - the transaction has already been prepared.
+ * It can only be aborted or committed.
+ *
+ */
+int mdb_txn_prepare(MDB_txn *txn);
+
+ /** @brief Rollback the last committed transaction in the environment.
+ *
+ * This function exists to support two-phase commit protocols.
+ * The metapage update for the last committed transaction will be zeroed,
+ * so its changes will be ignored. It should only be used when the local
+ * phase of a multi-phase transaction has fully committed, but some other
+ * remote phase which successfully prepared has failed to commit.
+ * This function may not be called twice in a row. No other operations
+ * may be performed on the environment, by any processes, between the
+ * preceding #mdb_txn_commit() and this call.
+ * @param[in] env An environment handle returned by #mdb_env_create().
+ * @param[in] txnid The ID of the transaction to rollback, obtained from
+ * #mdb_txnid() on the previous transaction.
+ * @return A non-zero error value on failure and 0 on success. Some possible
+ * errors are:
+ *
+ * - EINVAL - an invalid parameter was specified.
+ *
- ENOSPC - no more disk space.
+ *
- EIO - a low-level I/O error occurred while writing.
+ *
- #MDB_CANT_ROLLBACK - a rollback has already been done, there is
+ * no other valid metapage to roll back to, or another transaction
+ * has already been committed over the specified txnid.
+ *
+ */
+int mdb_env_rollback(MDB_env *env, mdb_size_t txnid);
+
/** @brief Abandon all the operations of the transaction instead of saving them.
*
* The transaction handle is freed. It and its cursors must not be used
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index 626c0c3cf4..bfecbb5b9a 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -1500,8 +1500,9 @@ struct MDB_txn {
#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */
#define MDB_TXN_HAS_CHILD 0x10 /**< txn has an #MDB_txn.%mt_child */
#define MDB_TXN_DIRTYNUM 0x20 /**< dirty list uses nump list */
+#define MDB_TXN_PREPARE 0x40 /**< prepare txn, don't fully commit */
/** most operations on the txn are currently illegal */
-#define MDB_TXN_BLOCKED (MDB_TXN_FINISHED|MDB_TXN_ERROR|MDB_TXN_HAS_CHILD)
+#define MDB_TXN_BLOCKED (MDB_TXN_FINISHED|MDB_TXN_ERROR|MDB_TXN_HAS_CHILD|MDB_TXN_PREPARE)
/** @} */
unsigned int mt_flags; /**< @ref mdb_txn */
/** #dirty_list room: Array size - \#dirty pages visible to this txn.
@@ -1888,6 +1889,8 @@ static char *const mdb_errstr[] = {
"MDB_BAD_CHECKSUM: Page checksum mismatch",
"MDB_CRYPTO_FAIL: Page encryption or decryption failed",
"MDB_ENV_ENCRYPTION: Environment encryption mismatch",
+ "MDB_TXN_PENDING: Transaction already prepared, must abort or commit",
+ "MDB_CANT_ROLLBACK: Environment can't rollback last transaction",
};
char *
@@ -4263,7 +4266,7 @@ done:
static int ESECT mdb_env_share_locks(MDB_env *env, int *excl);
static int
-_mdb_txn_commit(MDB_txn *txn)
+_mdb_txn_commit(MDB_txn *txn, int flag)
{
int rc;
unsigned int i, end_mode;
@@ -4276,7 +4279,7 @@ _mdb_txn_commit(MDB_txn *txn)
end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE;
if (txn->mt_child) {
- rc = _mdb_txn_commit(txn->mt_child);
+ rc = _mdb_txn_commit(txn->mt_child, 0);
if (rc)
goto fail;
}
@@ -4287,6 +4290,10 @@ _mdb_txn_commit(MDB_txn *txn)
goto done;
}
+ if (F_ISSET(txn->mt_flags, MDB_TXN_PREPARE)) {
+ goto prepared;
+ }
+
if (txn->mt_flags & (MDB_TXN_FINISHED|MDB_TXN_ERROR)) {
DPUTS("txn has failed/finished, can't commit");
if (txn->mt_parent)
@@ -4486,6 +4493,12 @@ _mdb_txn_commit(MDB_txn *txn)
if (!F_ISSET(txn->mt_flags, MDB_TXN_NOSYNC) &&
(rc = mdb_env_sync0(env, 0, txn->mt_next_pgno)))
goto fail;
+ if (F_ISSET(flag, MDB_TXN_PREPARE)) {
+ txn->mt_flags |= MDB_TXN_PREPARE;
+ return MDB_SUCCESS;
+ }
+
+prepared:
if ((rc = mdb_env_write_meta(txn)))
goto fail;
end_mode = MDB_END_COMMITTED|MDB_END_UPDATE;
@@ -4512,7 +4525,43 @@ int
mdb_txn_commit(MDB_txn *txn)
{
MDB_TRACE(("%p", txn));
- return _mdb_txn_commit(txn);
+ return _mdb_txn_commit(txn, 0);
+}
+
+int
+mdb_txn_prepare(MDB_txn *txn)
+{
+ MDB_TRACE(("%p", txn));
+ if (F_ISSET(txn->mt_flags, MDB_TXN_PREPARE))
+ return MDB_TXN_PENDING;
+ return _mdb_txn_commit(txn, MDB_TXN_PREPARE);
+}
+
+int
+mdb_env_rollback(MDB_env *env, mdb_size_t txnid)
+{
+ MDB_meta **metas = env->me_metas;
+ int newest, previous, rc = 0;
+
+ if (env->me_txns && LOCK_MUTEX(rc, env, env->me_wmutex))
+ return rc;
+ newest = metas[0]->mm_txnid < metas[1]->mm_txnid;
+ previous = newest ^ 1;
+ if (!metas[previous]->mm_txnid || metas[newest]->mm_txnid != txnid)
+ rc = MDB_CANT_ROLLBACK;
+ else {
+ MDB_txn txn = {0};
+ MDB_db dbs[2] = {0};
+ txn.mt_env = env;
+ txn.mt_dbs = dbs;
+ rc = mdb_env_write_meta(&txn);
+ }
+ if (env->me_txns) {
+ if (rc == MDB_SUCCESS)
+ env->me_txns->mti_txnid = metas[previous]->mm_txnid;
+ UNLOCK_MUTEX(env->me_wmutex);
+ }
+ return rc;
}
static int ESECT mdb_env_map(MDB_env *env, void *addr);