Fix deadlock between rndc addzone/delzone/modzone

It follows a description of the steps that were leading to the deadlock:

1. `do_addzone` calls `isc_task_beginexclusive`.

2. `isc_task_beginexclusive` waits for (N_WORKERS - 1) halted tasks,
   this blocks waiting for those (no. workers -1) workers to halt.
...
isc_task_beginexclusive(isc_task_t *task0) {
    ...
	while (manager->halted + 1 < manager->workers) {
		wake_all_queues(manager);
		WAIT(&manager->halt_cond, &manager->halt_lock);
	}
```

3. It is possible that in `task.c / dispatch()` a worker is running a
   task event, if that event blocks it will not allow this worker to
   halt.

4. `do_addzone` acquires `LOCK(&view->new_zone_lock);`,

5. `rmzone` event is called from some worker's `dispatch()`, `rmzone`
   blocks waiting for the same lock.

6. `do_addzone` calls `isc_task_beginexclusive`.

7. Deadlock triggered, since:
	- `rmzone` is wating for the lock.
	- `isc_task_beginexclusive` is waiting for (no. workers - 1) to
	   be halted
	- since `rmzone` event is blocked it won't allow the worker to halt.

To fix this, we updated do_addzone code to call isc_task_beginexclusive
before the lock is acquired, we postpone locking to the nearest required
place, same for isc_task_beginexclusive.

The same could happen with rndc modzone, so that was addressed as well.
This commit is contained in:
Diego Fronza 2021-04-16 12:21:31 -03:00 committed by Diego dos Santos Fronza
parent fa6b277b8d
commit 9298dcebbd

View file

@ -13683,13 +13683,13 @@ do_addzone(named_server_t *server, ns_cfgctx_t *cfg, dns_view_t *view,
#ifndef HAVE_LMDB
FILE *fp = NULL;
bool cleanup_config = false;
#else /* HAVE_LMDB */
#else /* HAVE_LMDB */
MDB_txn *txn = NULL;
MDB_dbi dbi;
bool locked = false;
UNUSED(zoneconf);
LOCK(&view->new_zone_lock);
#endif /* HAVE_LMDB */
#endif
/* Zone shouldn't already exist */
if (redirect) {
@ -13709,12 +13709,16 @@ do_addzone(named_server_t *server, ns_cfgctx_t *cfg, dns_view_t *view,
goto cleanup;
}
result = isc_task_beginexclusive(server->task);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
#ifndef HAVE_LMDB
/*
* Make sure we can open the configuration save file
*/
result = isc_stdio_open(view->new_zone_file, "a", &fp);
if (result != ISC_R_SUCCESS) {
isc_task_endexclusive(server->task);
TCHECK(putstr(text, "unable to create '"));
TCHECK(putstr(text, view->new_zone_file));
TCHECK(putstr(text, "': "));
@ -13725,9 +13729,12 @@ do_addzone(named_server_t *server, ns_cfgctx_t *cfg, dns_view_t *view,
(void)isc_stdio_close(fp);
fp = NULL;
#else /* HAVE_LMDB */
LOCK(&view->new_zone_lock);
locked = true;
/* Make sure we can open the NZD database */
result = nzd_writable(view);
if (result != ISC_R_SUCCESS) {
isc_task_endexclusive(server->task);
TCHECK(putstr(text, "unable to open NZD database for '"));
TCHECK(putstr(text, view->new_zone_db));
TCHECK(putstr(text, "'"));
@ -13736,9 +13743,6 @@ do_addzone(named_server_t *server, ns_cfgctx_t *cfg, dns_view_t *view,
}
#endif /* HAVE_LMDB */
result = isc_task_beginexclusive(server->task);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
/* Mark view unfrozen and configure zone */
dns_view_thaw(view);
result = configure_zone(cfg->config, zoneobj, cfg->vconfig,
@ -13842,7 +13846,9 @@ cleanup:
if (txn != NULL) {
(void)nzd_close(&txn, false);
}
UNLOCK(&view->new_zone_lock);
if (locked) {
UNLOCK(&view->new_zone_lock);
}
#endif /* HAVE_LMDB */
if (zone != NULL) {
@ -13866,7 +13872,7 @@ do_modzone(named_server_t *server, ns_cfgctx_t *cfg, dns_view_t *view,
#else /* HAVE_LMDB */
MDB_txn *txn = NULL;
MDB_dbi dbi;
LOCK(&view->new_zone_lock);
bool locked = false;
#endif /* HAVE_LMDB */
/* Zone must already exist */
@ -13912,6 +13918,8 @@ do_modzone(named_server_t *server, ns_cfgctx_t *cfg, dns_view_t *view,
(void)isc_stdio_close(fp);
fp = NULL;
#else /* HAVE_LMDB */
LOCK(&view->new_zone_lock);
locked = true;
/* Make sure we can open the NZD database */
result = nzd_writable(view);
if (result != ISC_R_SUCCESS) {
@ -14064,7 +14072,9 @@ cleanup:
if (txn != NULL) {
(void)nzd_close(&txn, false);
}
UNLOCK(&view->new_zone_lock);
if (locked) {
UNLOCK(&view->new_zone_lock);
}
#endif /* HAVE_LMDB */
if (zone != NULL) {