From c66ee1b31e2998992cc4bfdca153b2028c26b11f Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Wed, 4 Jul 2012 20:20:48 +0000 Subject: [PATCH] Make use of GEOM Gate direct reads feature. This allows HAST to serve reads with native speed of the underlying provider. There are three situations when direct reads are not used: 1. Data is being synchronized and synchronization source is the secondary node, which means secondary node has more recent data and we should read from it. 2. Local read failed and we have to try to read from the secondary node. 3. Local component is unavailable and all I/O requests are served from the secondary node. Sponsored by: Panzura, http://www.panzura.com MFC after: 1 month --- sbin/hastd/primary.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index d6d93b14afd..88159cb6bec 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -543,6 +543,27 @@ primary_connect(struct hast_resource *res, struct proto_conn **connp) return (0); } + +/* + * Function instructs GEOM_GATE to handle reads directly from within the kernel. + */ +static void +enable_direct_reads(struct hast_resource *res) +{ + struct g_gate_ctl_modify ggiomodify; + + bzero(&ggiomodify, sizeof(ggiomodify)); + ggiomodify.gctl_version = G_GATE_VERSION; + ggiomodify.gctl_unit = res->hr_ggateunit; + ggiomodify.gctl_modify = GG_MODIFY_READPROV | GG_MODIFY_READOFFSET; + strlcpy(ggiomodify.gctl_readprov, res->hr_localpath, + sizeof(ggiomodify.gctl_readprov)); + ggiomodify.gctl_readoffset = res->hr_localoff; + if (ioctl(res->hr_ggatefd, G_GATE_CMD_MODIFY, &ggiomodify) == 0) + pjdlog_debug(1, "Direct reads enabled."); + else + pjdlog_errno(LOG_WARNING, "Failed to enable direct reads"); +} static int init_remote(struct hast_resource *res, struct proto_conn **inp, @@ -692,6 +713,8 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, res->hr_secondary_localcnt = nv_get_uint64(nvin, "localcnt"); res->hr_secondary_remotecnt = nv_get_uint64(nvin, "remotecnt"); res->hr_syncsrc = nv_get_uint8(nvin, "syncsrc"); + if (res->hr_syncsrc == HAST_SYNCSRC_PRIMARY) + enable_direct_reads(res); if (nv_exists(nvin, "virgin")) { /* * Secondary was reinitialized, bump localcnt if it is 0 as @@ -1789,13 +1812,14 @@ sync_thread(void *arg __unused) struct timeval tstart, tend, tdiff; unsigned int ii, ncomp, ncomps; off_t offset, length, synced; - bool dorewind; + bool dorewind, directreads; int syncext; ncomps = HAST_NCOMPONENTS; dorewind = true; synced = 0; offset = -1; + directreads = false; for (;;) { mtx_lock(&sync_lock); @@ -1867,6 +1891,8 @@ sync_thread(void *arg __unused) event_send(res, EVENT_SYNCDONE); } mtx_lock(&metadata_lock); + if (res->hr_syncsrc == HAST_SYNCSRC_SECONDARY) + directreads = true; res->hr_syncsrc = HAST_SYNCSRC_UNDEF; res->hr_primary_localcnt = res->hr_secondary_remotecnt; @@ -1880,6 +1906,10 @@ sync_thread(void *arg __unused) mtx_unlock(&metadata_lock); } rw_unlock(&hio_remote_lock[ncomp]); + if (directreads) { + directreads = false; + enable_direct_reads(res); + } continue; } pjdlog_debug(2, "sync: Taking free request.");