mirror of
https://git.openldap.org/openldap/openldap.git
synced 2025-12-21 07:09:34 -05:00
Update MDB comments: Caveats, flags, etc.
This commit is contained in:
parent
8e7bb2042b
commit
7fdf672041
2 changed files with 53 additions and 31 deletions
|
|
@ -78,10 +78,11 @@
|
||||||
* database can grow quickly. Write transactions prevent
|
* database can grow quickly. Write transactions prevent
|
||||||
* other write transactions, since writes are serialized.
|
* other write transactions, since writes are serialized.
|
||||||
*
|
*
|
||||||
* ...when several processes can use a database concurrently:
|
|
||||||
*
|
|
||||||
* - Avoid suspending a process with active transactions. These
|
* - Avoid suspending a process with active transactions. These
|
||||||
* would then be "long-lived" as above.
|
* would then be "long-lived" as above. Also read transactions
|
||||||
|
* suspended when writers commit could sometimes see wrong data.
|
||||||
|
*
|
||||||
|
* ...when several processes can use a database concurrently:
|
||||||
*
|
*
|
||||||
* - Avoid aborting a process with an active transaction.
|
* - Avoid aborting a process with an active transaction.
|
||||||
* The transaction becomes "long-lived" as above until the lockfile
|
* The transaction becomes "long-lived" as above until the lockfile
|
||||||
|
|
@ -221,7 +222,7 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
|
||||||
* Values do not overlap Database Flags.
|
* Values do not overlap Database Flags.
|
||||||
* @{
|
* @{
|
||||||
*/
|
*/
|
||||||
/** mmap at a fixed address */
|
/** mmap at a fixed address (experimental) */
|
||||||
#define MDB_FIXEDMAP 0x01
|
#define MDB_FIXEDMAP 0x01
|
||||||
/** no environment directory */
|
/** no environment directory */
|
||||||
#define MDB_NOSUBDIR 0x4000
|
#define MDB_NOSUBDIR 0x4000
|
||||||
|
|
@ -233,7 +234,7 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
|
||||||
#define MDB_NOMETASYNC 0x40000
|
#define MDB_NOMETASYNC 0x40000
|
||||||
/** use writable mmap */
|
/** use writable mmap */
|
||||||
#define MDB_WRITEMAP 0x80000
|
#define MDB_WRITEMAP 0x80000
|
||||||
/** use asynchronous msync */
|
/** use asynchronous msync when MDB_WRITEMAP is used */
|
||||||
#define MDB_MAPASYNC 0x100000
|
#define MDB_MAPASYNC 0x100000
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
|
@ -435,24 +436,43 @@ int mdb_env_create(MDB_env **env);
|
||||||
* under that directory. With this option, \b path is used as-is for
|
* under that directory. With this option, \b path is used as-is for
|
||||||
* the database main data file. The database lock file is the \b path
|
* the database main data file. The database lock file is the \b path
|
||||||
* with "-lock" appended.
|
* with "-lock" appended.
|
||||||
* <li>#MDB_NOSYNC
|
|
||||||
* Don't perform a synchronous flush after committing a transaction. This means
|
|
||||||
* transactions will exhibit the ACI (atomicity, consistency, and isolation)
|
|
||||||
* properties, but not D (durability); that is database integrity will be
|
|
||||||
* maintained but it is possible some number of the most recently committed
|
|
||||||
* transactions may be undone after a system crash. The number of transactions
|
|
||||||
* at risk is governed by how often the system flushes dirty buffers to disk
|
|
||||||
* and how often #mdb_env_sync() is called. This flag may be changed
|
|
||||||
* at any time using #mdb_env_set_flags().
|
|
||||||
* <li>#MDB_NOMETASYNC
|
|
||||||
* Don't perform a synchronous flush of the meta page after committing
|
|
||||||
* a transaction. This is similar to the #MDB_NOSYNC case, but safer
|
|
||||||
* because the transaction data is still flushed. The meta page for any
|
|
||||||
* transaction N will be flushed by the data flush of transaction N+1.
|
|
||||||
* In case of a system crash, the last committed transaction may be
|
|
||||||
* lost. This flag may be changed at any time using #mdb_env_set_flags().
|
|
||||||
* <li>#MDB_RDONLY
|
* <li>#MDB_RDONLY
|
||||||
* Open the environment in read-only mode. No write operations will be allowed.
|
* Open the environment in read-only mode. No write operations will be
|
||||||
|
* allowed. MDB will still modify the lock file - except on read-only
|
||||||
|
* filesystems, where MDB does not use locks.
|
||||||
|
* <li>#MDB_WRITEMAP
|
||||||
|
* Use a writeable memory map unless MDB_RDONLY is set. This is faster
|
||||||
|
* and uses fewer mallocs, but loses protection from application bugs
|
||||||
|
* like wild pointer writes and other bad updates into the database.
|
||||||
|
* Incompatible with nested transactions.
|
||||||
|
* <li>#MDB_NOMETASYNC
|
||||||
|
* Flush system buffers to disk only once per transaction, omit the
|
||||||
|
* metadata flush. Defer that until the system flushes files to disk,
|
||||||
|
* or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization
|
||||||
|
* maintains database integrity, but a system crash may undo the last
|
||||||
|
* committed transaction. I.e. it preserves the ACI (atomicity,
|
||||||
|
* consistency, isolation) but not D (durability) database property.
|
||||||
|
* This flag may be changed at any time using #mdb_env_set_flags().
|
||||||
|
* <li>#MDB_NOSYNC
|
||||||
|
* Don't flush system buffers to disk when committing a transaction.
|
||||||
|
* This optimization means a system crash can corrupt the database or
|
||||||
|
* lose the last transactions if buffers are not yet flushed to disk.
|
||||||
|
* The risk is governed by how often the system flushes dirty buffers
|
||||||
|
* to disk and how often #mdb_env_sync() is called. However, if the
|
||||||
|
* filesystem preserves write order and the #MDB_WRITEMAP flag is not
|
||||||
|
* used, transactions exhibit ACI (atomicity, consistency, isolation)
|
||||||
|
* properties and only lose D (durability). I.e. database integrity
|
||||||
|
* is maintained, but a system crash may undo the final transactions.
|
||||||
|
* Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no
|
||||||
|
* hint for when to write transactions to disk, unless #mdb_env_sync()
|
||||||
|
* is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable.
|
||||||
|
* This flag may be changed at any time using #mdb_env_set_flags().
|
||||||
|
* <li>#MDB_MAPASYNC
|
||||||
|
* When using #MDB_WRITEMAP, use asynchronous flushes to disk.
|
||||||
|
* As with #MDB_NOSYNC, a system crash can then corrupt the
|
||||||
|
* database or lose the last transactions. Calling #mdb_env_sync()
|
||||||
|
* ensures on-disk database integrity until next commit.
|
||||||
|
* This flag may be changed at any time using #mdb_env_set_flags().
|
||||||
* </ul>
|
* </ul>
|
||||||
* @param[in] mode The UNIX permissions to set on created files. This parameter
|
* @param[in] mode The UNIX permissions to set on created files. This parameter
|
||||||
* is ignored on Windows.
|
* is ignored on Windows.
|
||||||
|
|
@ -502,7 +522,7 @@ int mdb_env_info(MDB_env *env, MDB_envinfo *stat);
|
||||||
* Data is always written to disk when #mdb_txn_commit() is called,
|
* Data is always written to disk when #mdb_txn_commit() is called,
|
||||||
* but the operating system may keep it buffered. MDB always flushes
|
* but the operating system may keep it buffered. MDB always flushes
|
||||||
* the OS buffers upon commit as well, unless the environment was
|
* the OS buffers upon commit as well, unless the environment was
|
||||||
* opened with #MDB_NOSYNC.
|
* opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
|
||||||
* @param[in] env An environment handle returned by #mdb_env_create()
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
||||||
* @param[in] force If non-zero, force a synchronous flush. Otherwise
|
* @param[in] force If non-zero, force a synchronous flush. Otherwise
|
||||||
* if the environment has the #MDB_NOSYNC flag set the flushes
|
* if the environment has the #MDB_NOSYNC flag set the flushes
|
||||||
|
|
@ -731,7 +751,7 @@ int mdb_txn_renew(MDB_txn *txn);
|
||||||
* by the given transaction. Only one thread should call this function;
|
* by the given transaction. Only one thread should call this function;
|
||||||
* it is not mutex-protected in a read-only transaction.
|
* it is not mutex-protected in a read-only transaction.
|
||||||
* To use named databases (with name != NULL), #mdb_env_set_maxdbs()
|
* To use named databases (with name != NULL), #mdb_env_set_maxdbs()
|
||||||
* must be called before opening the enviorment.
|
* must be called before opening the environment.
|
||||||
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
|
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
|
||||||
* @param[in] name The name of the database to open. If only a single
|
* @param[in] name The name of the database to open. If only a single
|
||||||
* database is needed in the environment, this value may be NULL.
|
* database is needed in the environment, this value may be NULL.
|
||||||
|
|
@ -796,7 +816,7 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
|
||||||
*
|
*
|
||||||
* This call is not mutex protected. Handles should only be closed by
|
* This call is not mutex protected. Handles should only be closed by
|
||||||
* a single thread, and only if no other threads are going to reference
|
* a single thread, and only if no other threads are going to reference
|
||||||
* the database handle any further.
|
* the database handle or one of its cursors any further.
|
||||||
* @param[in] env An environment handle returned by #mdb_env_create()
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
||||||
* @param[in] dbi A database handle returned by #mdb_dbi_open()
|
* @param[in] dbi A database handle returned by #mdb_dbi_open()
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -382,7 +382,7 @@ static txnid_t mdb_debug_start;
|
||||||
*/
|
*/
|
||||||
#define P_INVALID (~(pgno_t)0)
|
#define P_INVALID (~(pgno_t)0)
|
||||||
|
|
||||||
/** Test if a flag \b f is set in a flag word \b w. */
|
/** Test if the flags \b f are set in a flag word \b w. */
|
||||||
#define F_ISSET(w, f) (((w) & (f)) == (f))
|
#define F_ISSET(w, f) (((w) & (f)) == (f))
|
||||||
|
|
||||||
/** Used for offsets within a single page.
|
/** Used for offsets within a single page.
|
||||||
|
|
@ -404,6 +404,8 @@ typedef uint16_t indx_t;
|
||||||
* slot's address is saved in thread-specific data so that subsequent read
|
* slot's address is saved in thread-specific data so that subsequent read
|
||||||
* transactions started by the same thread need no further locking to proceed.
|
* transactions started by the same thread need no further locking to proceed.
|
||||||
*
|
*
|
||||||
|
* No reader table is used if the database is on a read-only filesystem.
|
||||||
|
*
|
||||||
* Since the database uses multi-version concurrency control, readers don't
|
* Since the database uses multi-version concurrency control, readers don't
|
||||||
* actually need any locking. This table is used to keep track of which
|
* actually need any locking. This table is used to keep track of which
|
||||||
* readers are using data from which old transactions, so that we'll know
|
* readers are using data from which old transactions, so that we'll know
|
||||||
|
|
@ -810,8 +812,8 @@ struct MDB_txn {
|
||||||
*/
|
*/
|
||||||
MDB_IDL mt_free_pgs;
|
MDB_IDL mt_free_pgs;
|
||||||
union {
|
union {
|
||||||
MDB_ID2L dirty_list; /**< modified pages */
|
MDB_ID2L dirty_list; /**< for write txns: modified pages */
|
||||||
MDB_reader *reader; /**< this thread's slot in the reader table */
|
MDB_reader *reader; /**< this thread's reader table slot or NULL */
|
||||||
} mt_u;
|
} mt_u;
|
||||||
/** Array of records for each DB known in the environment. */
|
/** Array of records for each DB known in the environment. */
|
||||||
MDB_dbx *mt_dbxs;
|
MDB_dbx *mt_dbxs;
|
||||||
|
|
@ -824,7 +826,7 @@ struct MDB_txn {
|
||||||
#define DB_DIRTY 0x01 /**< DB was written in this txn */
|
#define DB_DIRTY 0x01 /**< DB was written in this txn */
|
||||||
#define DB_STALE 0x02 /**< DB record is older than txnID */
|
#define DB_STALE 0x02 /**< DB record is older than txnID */
|
||||||
/** @} */
|
/** @} */
|
||||||
/** Array of cursors for each DB */
|
/** In write txns, array of cursors for each DB */
|
||||||
MDB_cursor **mt_cursors;
|
MDB_cursor **mt_cursors;
|
||||||
/** Array of flags for each DB */
|
/** Array of flags for each DB */
|
||||||
unsigned char *mt_dbflags;
|
unsigned char *mt_dbflags;
|
||||||
|
|
@ -941,7 +943,7 @@ struct MDB_env {
|
||||||
pid_t me_pid; /**< process ID of this env */
|
pid_t me_pid; /**< process ID of this env */
|
||||||
char *me_path; /**< path to the DB files */
|
char *me_path; /**< path to the DB files */
|
||||||
char *me_map; /**< the memory map of the data file */
|
char *me_map; /**< the memory map of the data file */
|
||||||
MDB_txninfo *me_txns; /**< the memory map of the lock file */
|
MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */
|
||||||
MDB_meta *me_metas[2]; /**< pointers to the two meta pages */
|
MDB_meta *me_metas[2]; /**< pointers to the two meta pages */
|
||||||
MDB_txn *me_txn; /**< current write transaction */
|
MDB_txn *me_txn; /**< current write transaction */
|
||||||
size_t me_mapsize; /**< size of the data memory map */
|
size_t me_mapsize; /**< size of the data memory map */
|
||||||
|
|
@ -950,7 +952,7 @@ struct MDB_env {
|
||||||
txnid_t me_pgfirst; /**< ID of first old page record we used */
|
txnid_t me_pgfirst; /**< ID of first old page record we used */
|
||||||
txnid_t me_pglast; /**< ID of last old page record we used */
|
txnid_t me_pglast; /**< ID of last old page record we used */
|
||||||
MDB_dbx *me_dbxs; /**< array of static DB info */
|
MDB_dbx *me_dbxs; /**< array of static DB info */
|
||||||
uint16_t *me_dbflags; /**< array of DB flags */
|
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
|
||||||
MDB_oldpages *me_pghead; /**< list of old page records */
|
MDB_oldpages *me_pghead; /**< list of old page records */
|
||||||
MDB_oldpages *me_pgfree; /**< list of page records to free */
|
MDB_oldpages *me_pgfree; /**< list of page records to free */
|
||||||
pthread_key_t me_txkey; /**< thread-key for readers */
|
pthread_key_t me_txkey; /**< thread-key for readers */
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue