zfsd: Use vdev prop values for fault/degrade thresholds

ZED uses vdev props for setting disk fault/degrade thresholds, this
patch enables zfsd to use the same vdev props for these same tasks.

OpenZFS on Linux is using vdev props for ZED disk fault/degrade
thresholds. Originally the thresholds supported were for io and checksum
events and recently this was updated to process slow io events as
well, see
cbe882298e

This patch enables us to use the same vdev props in zfsd as ZED uses.
After this patch is merged both OSs will use the same vdev props to set
retirement thresholds.

It's probably important to note that the threshold defaults are
different between OS. I've kept the existing defaults inside zfsd and
DID NOT match them to what ZED does.

Differential Revision: https://reviews.freebsd.org/D44043
MFC after:	2 weeks
Relnotes:	yes
Reviewed by:	asomers, allanjude
Sponsored by:	Axcient
Submitted by:	Alek Pinchuk <apinchuk@axcient.com>
This commit is contained in:
Alan Somers 2024-03-05 10:55:55 -07:00
parent e333110d1d
commit 89f4f91dbf
3 changed files with 135 additions and 43 deletions

View file

@ -53,6 +53,7 @@
#include <syslog.h>
#include <unistd.h>
#include <libzutil.h>
#include <libzfs.h>
#include <list>
@ -91,7 +92,6 @@ using DevdCtl::ParseException;
CaseFileList CaseFile::s_activeCases;
const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
//- CaseFile Static Public Methods ---------------------------------------------
CaseFile *
@ -255,6 +255,7 @@ CaseFile::RefreshVdevState()
m_vdevState = vd.State();
m_vdevPhysPath = vd.PhysicalPath();
m_vdevName = vd.Name(casePool, false);
return (true);
}
@ -610,15 +611,55 @@ CaseFile::ActivateSpare() {
return (Replace(vdev_type, devPath, /*isspare*/true));
}
/* Does the argument event refer to a checksum error? */
static bool
IsChecksumEvent(const Event* const event)
{
return ("ereport.fs.zfs.checksum" == event->Value("type"));
}
/* Does the argument event refer to an IO error? */
static bool
IsIOEvent(const Event* const event)
{
return ("ereport.fs.zfs.io" == event->Value("type"));
}
/* Does the argument event refer to an IO delay? */
static bool
IsDelayEvent(const Event* const event)
{
return ("ereport.fs.zfs.delay" == event->Value("type"));
}
void
CaseFile::RegisterCallout(const Event &event)
{
timeval now, countdown, elapsed, timestamp, zero, remaining;
/**
* The time ZFSD waits before promoting a tentative event
* into a permanent event.
*/
int sec = -1;
if (IsChecksumEvent(&event))
sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T);
else if (IsIOEvent(&event))
sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T);
else if (IsDelayEvent(&event))
sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T);
if (sec == -1)
sec = 60; /* default */
timeval removeGracePeriod = {
sec, /*sec*/
0 /*usec*/
};
gettimeofday(&now, 0);
timestamp = event.GetTimestamp();
timersub(&now, &timestamp, &elapsed);
timersub(&s_removeGracePeriod, &elapsed, &countdown);
timersub(&removeGracePeriod, &elapsed, &countdown);
/*
* If countdown is <= zero, Reset the timer to the
* smallest positive time value instead
@ -827,6 +868,10 @@ CaseFile::CaseFile(const Vdev &vdev)
guidString << m_poolGUID;
m_poolGUIDString = guidString.str();
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
m_vdevName = vdev.Name(zhp, false);
s_activeCases.push_back(this);
syslog(LOG_INFO, "Creating new CaseFile:\n");
@ -1158,43 +1203,54 @@ CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
return (retval);
}
/* Does the argument event refer to a checksum error? */
static bool
IsChecksumEvent(const Event* const event)
/* Lookup the vdev prop. Used for checksum, IO, or slow IO props */
int
CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const
{
return ("ereport.fs.zfs.checksum" == event->Value("type"));
}
char val[ZFS_MAXPROPLEN];
zprop_source_t srctype;
DevdCtl::Guid poolGUID = PoolGUID();
ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
/* Does the argument event refer to an IO error? */
static bool
IsIOEvent(const Event* const event)
{
return ("ereport.fs.zfs.io" == event->Value("type"));
}
char *prop_str = (char *) vdev_prop_to_name(vdev_prop);
if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(),
vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0)
return (-1);
/* Does the argument event refer to an IO delay? */
static bool
IsDelayEvent(const Event* const event)
{
return ("ereport.fs.zfs.delay" == event->Value("type"));
/* we'll get "-" from libzfs for a prop that is not set */
if (zfs_isnumber(val) == B_FALSE)
return (-1);
return (atoi(val));
}
bool
CaseFile::ShouldDegrade() const
{
int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N);
if (checksum_n == -1)
checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
return (std::count_if(m_events.begin(), m_events.end(),
IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
IsChecksumEvent) > checksum_n);
}
bool
CaseFile::ShouldFault() const
{
bool should_fault_for_io, should_fault_for_delay;
int io_n = GetVdevProp(VDEV_PROP_IO_N);
int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N);
if (io_n == -1)
io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
if (slow_io_n == -1)
slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT;
should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
IsIOEvent) > ZFS_DEGRADE_IO_COUNT;
IsIOEvent) > io_n;
should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
IsDelayEvent) > ZFS_FAULT_DELAY_COUNT;
IsDelayEvent) > slow_io_n;
return (should_fault_for_io || should_fault_for_delay);
}

View file

@ -235,18 +235,27 @@ public:
*/
int IsSpare();
/**
* \brief Get case vdev's specified property
*/
int GetVdevProp(vdev_prop_t) const;
protected:
enum {
/*
* Use these defaults if we can't get the corresponding vdev
* prop or if the prop is not set
*/
/**
* The number of soft errors on a vdev required
* to transition a vdev from healthy to degraded
* status.
* status
*/
ZFS_DEGRADE_IO_COUNT = 50,
DEFAULT_ZFS_DEGRADE_IO_COUNT = 50,
/**
* The number of delay errors on a vdev required to fault it
*/
ZFS_FAULT_DELAY_COUNT = 8,
DEFAULT_ZFS_FAULT_SLOW_IO_COUNT = 8,
};
static CalloutFunc_t OnGracePeriodEnded;
@ -379,12 +388,6 @@ protected:
*/
static const string s_caseFilePath;
/**
* \brief The time ZFSD waits before promoting a tentative event
* into a permanent event.
*/
static const timeval s_removeGracePeriod;
/**
* \brief A list of soft error events counted against the health of
* a vdev.
@ -404,6 +407,7 @@ protected:
string m_poolGUIDString;
string m_vdevGUIDString;
string m_vdevPhysPath;
string m_vdevName;
int m_is_spare;
/**

View file

@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd April 18, 2020
.Dd February 20, 2024
.Dt ZFSD 8
.Os
.Sh NAME
@ -55,7 +55,7 @@ directly.
Instead, they control its behavior indirectly through zpool configuration.
There are two ways to influence
.Nm :
assigning hotspares and setting pool properties.
assigning hot spares and setting pool properties.
Currently, only the
.Em autoreplace
property has any effect.
@ -69,7 +69,7 @@ will attempt to resolve the following types of fault:
.It device removal
When a leaf vdev disappears,
.Nm
will activate any available hotspare.
will activate any available hot spare.
.It device arrival
When a new GEOM device appears,
.Nm
@ -77,40 +77,71 @@ will attempt to read its ZFS label, if any.
If it matches a previously removed vdev on an active pool,
.Nm
will online it.
Once resilvering completes, any active hotspare will detach automatically.
Once resilvering completes, any active hot spare will detach automatically.
.Pp
If the new device has no ZFS label but its physical path matches the
physical path of a previously removed vdev on an active pool, and that
pool has the autoreplace property set, then
.Nm
will replace the missing vdev with the newly arrived device.
Once resilvering completes, any active hotspare will detach automatically.
Once resilvering completes, any active hot spare will detach automatically.
.It vdev degrade or fault events
If a vdev becomes degraded or faulted,
.Nm
will activate any available hotspare.
will activate any available hot spare.
.It I/O errors
If a leaf vdev generates more than 50 I/O errors in a 60 second period, then
By default, if a leaf vdev generates more than 50 I/O errors in a 60 second
period, then
.Nm
will mark that vdev as
.Em FAULTED .
ZFS will no longer issue any I/Os to it.
.Nm
will activate a hotspare if one is available.
will activate a hot spare if one is available. The defaults can be changed by
setting the
.Em io_n
and/or
.Em io_t
vdev properties. See
.Xr vdevprops 7
for details.
.It I/O delays
By default, if a leaf vdev generates more than delayed 8 I/O events in a 60
second period, then
.Nm
will mark that vdev as
.Em FAULTED .
ZFS will no longer issue any I/Os to it.
.Nm
will activate a hot spare if one is available. The defaults can be changed by
setting the
.Em slow_io_n
and/or
.Em slow_io_t
vdev properties. See
.Xr vdevprops 7
for details.
.It Checksum errors
If a leaf vdev generates more than 50 checksum errors in a 60 second
period, then
By default, if a leaf vdev generates more than 50 checksum errors in a 60
second period, then
.Nm
will mark that vdev as
.Em DEGRADED .
ZFS will still use it, but zfsd will activate a spare anyway.
ZFS will still use it, but zfsd will also activate a hot spare if one is
available. The defaults can be changed by setting the
.Em checksum_n
and/or
.Em checksum_t
vdev properties. See
.Xr vdevprops 7
for details.
.It Spare addition
If the system administrator adds a hotspare to a pool that is already degraded,
If the system administrator adds a hot spare to a pool that is already degraded,
.Nm
will activate the spare.
.It Resilver complete
.Nm
will detach any hotspare once a permanent replacement finishes resilvering.
will detach any hot spare once a permanent replacement finishes resilvering.
.It Physical path change
If the physical path of an existing disk changes,
.Nm
@ -134,6 +165,7 @@ then reads them back in when next it starts up.
.El
.Sh SEE ALSO
.Xr devctl 4 ,
.Xr vdevprops 7 ,
.Xr zpool 8
.Sh HISTORY
.Nm