From 4e7f640dfbe1f666c3857534899ee168776fbe67 Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Sat, 31 Mar 2007 23:23:42 +0000
Subject: [PATCH] Optimize sx locks to use simple atomic operations for the
 common cases of obtaining and releasing shared and exclusive locks.  The
 algorithms for manipulating the lock cookie are very similar to that rwlocks.
  This patch also adds support for exclusive locks using the same algorithm as
 mutexes.

A new sx_init_flags() function has been added so that optional flags can be
specified to alter a given locks behavior.  The flags include SX_DUPOK,
SX_NOWITNESS, SX_NOPROFILE, and SX_QUITE which are all identical in nature
to the similar flags for mutexes.

Adaptive spinning on select locks may be enabled by enabling the
ADAPTIVE_SX kernel option.  Only locks initialized with the SX_ADAPTIVESPIN
flag via sx_init_flags() will adaptively spin.

The common cases for sx_slock(), sx_sunlock(), sx_xlock(), and sx_xunlock()
are now performed inline in non-debug kernels.  As a result, <sys/sx.h> now
requires <sys/lock.h> to be included prior to <sys/sx.h>.

The new kernel option SX_NOINLINE can be used to disable the aforementioned
inlining in non-debug kernels.

The size of struct sx has changed, so the kernel ABI is probably greatly
disturbed.

MFC after:	1 month
Submitted by:	attilio
Tested by:	kris, pjd
---
 share/man/man9/Makefile                 |   1 +
 share/man/man9/sx.9                     |  62 +-
 sys/conf/NOTES                          |  13 +
 sys/conf/options                        |   2 +
 sys/dev/acpica/acpi_ec.c                |   1 +
 sys/dev/mxge/if_mxge.c                  |   1 +
 sys/dev/usb/if_aue.c                    |   1 +
 sys/dev/usb/if_axe.c                    |   1 +
 sys/gnu/fs/xfs/FreeBSD/support/mrlock.c |  51 +-
 sys/gnu/fs/xfs/FreeBSD/support/mrlock.h |  48 +-
 sys/i386/acpica/acpi_machdep.c          |   1 +
 sys/kern/kern_sx.c                      | 914 ++++++++++++++++++------
 sys/netinet6/in6_src.c                  |   1 +
 sys/sys/_sx.h                           |  43 ++
 sys/sys/sleepqueue.h                    |   6 +-
 sys/sys/sx.h                            | 186 ++++-
 16 files changed, 998 insertions(+), 334 deletions(-)
 create mode 100644 sys/sys/_sx.h

diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index e74677484bc..1e2c44d91f0 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -1058,6 +1058,7 @@ MLINKS+=sx.9 sx_assert.9 \
 	sx.9 sx_destroy.9 \
 	sx.9 sx_downgrade.9 \
 	sx.9 sx_init.9 \
+	sx.9 sx_init_flags.9 \
 	sx.9 sx_slock.9 \
 	sx.9 sx_sunlock.9 \
 	sx.9 SX_SYSINIT.9 \
diff --git a/share/man/man9/sx.9 b/share/man/man9/sx.9
index 447e48c58a8..c0037d19454 100644
--- a/share/man/man9/sx.9
+++ b/share/man/man9/sx.9
@@ -32,6 +32,7 @@
 .Sh NAME
 .Nm sx ,
 .Nm sx_init ,
+.Nm sx_init_flags ,
 .Nm sx_destroy ,
 .Nm sx_slock ,
 .Nm sx_xlock ,
@@ -54,6 +55,8 @@
 .Ft void
 .Fn sx_init "struct sx *sx" "const char *description"
 .Ft void
+.Fn sx_init_flags "struct sx *sx" "const char *description" "int opts"
+.Ft void
 .Fn sx_destroy "struct sx *sx"
 .Ft void
 .Fn sx_slock "struct sx *sx"
@@ -87,11 +90,14 @@
 .Sh DESCRIPTION
 Shared/exclusive locks are used to protect data that are read far more often
 than they are written.
-Mutexes are inherently more efficient than shared/exclusive locks, so
+Shared/exclusive locks do not implement priority propagation like mutexes and
+reader/writer locks to prevent priority inversions, so
 shared/exclusive locks should be used prudently.
 .Pp
-Shared/exclusive locks are created with
-.Fn sx_init ,
+Shared/exclusive locks are created with either
+.Fn sx_init
+or
+.Fn sx_init_flags
 where
 .Fa sx
 is a pointer to space for a
@@ -100,8 +106,40 @@ and
 .Fa description
 is a pointer to a null-terminated character string that describes the
 shared/exclusive lock.
+The
+.Fa opts
+argument to
+.Fn sx_init_flags
+specifies a set of optional flags to alter the behavior of
+.Fa sx .
+It contains one or more of the following flags:
+.Bl -tag -width SX_ADAPTIVESPIN
+.It Dv SX_ADAPTIVESPIN
+If the kernel is compiled with
+.Cd "options ADAPTIVE_SX" ,
+then lock operations for
+.Fa sx
+will spin instead of sleeping while an exclusive lock holder is executing on
+another CPU.
+.It Dv SX_DUPOK
+Witness should not log messages about duplicate locks being acquired.
+.It Dv SX_NOWITNESS
+Instruct
+.Xr witness 4
+to ignore this lock.
+.It Dv SX_NOPROFILE
+Do not profile this lock.
+.It Dv SX_QUIET
+Do not log any operations for this lock via
+.Xr ktr 4 .
+.El
+.Pp
 Shared/exclusive locks are destroyed with
 .Fn sx_destroy .
+The lock
+.Fa sx
+must not be locked by any thread when it is destroyed.
+.Pp
 Threads acquire and release a shared lock by calling
 .Fn sx_slock
 or
@@ -155,7 +193,7 @@ function tests
 for the assertions specified in
 .Fa what ,
 and panics if they are not met.
-The following assertions are supported:
+One of the following assertions must be specified:
 .Bl -tag -width ".Dv SX_UNLOCKED"
 .It Dv SX_LOCKED
 Assert that the current thread has either a shared or an exclusive lock on the
@@ -178,6 +216,22 @@ lock pointed to
 by the first argument.
 .El
 .Pp
+In addition, one of the following optional assertions may be included with
+either an
+.Dv SX_LOCKED ,
+.Dv SX_SLOCKED ,
+or
+.Dv SX_XLOCKED
+assertion:
+.Bl -tag -width ".Dv SX_NOTRECURSED"
+.It Dv SX_RECURSED
+Assert that the current thread has a recursed lock on
+.Fa sx .
+.It Dv SX_NOTRECURSED
+Assert that the current thread does not have a recursed lock on
+.Fa sx .
+.El
+.Pp
 .Fn sx_xlocked
 will return non-zero if the current thread holds the exclusive lock;
 otherwise, it will return zero.
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index c34d48f6025..0bf6d64626d 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -214,6 +214,12 @@ options 	NO_ADAPTIVE_RWLOCKS
 # to sleep rather than spinning.
 options 	ADAPTIVE_GIANT
 
+# ADAPTIVE_SX changes the behavior of sx locks to spin if the thread
+# that currently owns the lock is executing on another CPU.  Note that
+# in addition to enabling this option, individual sx locks must be
+# initialized with the SX_ADAPTIVESPIN flag.
+options 	ADAPTIVE_SX
+
 # MUTEX_NOINLINE forces mutex operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
@@ -233,6 +239,13 @@ options 	MUTEX_WAKE_ALL
 # and WITNESS options.
 options 	RWLOCK_NOINLINE
 
+# SX_NOINLINE forces sx lock operations to call functions to perform each
+# operation rather than inlining the simple cases.  This can be used to
+# shrink the size of the kernel text segment.  Note that this behavior is
+# already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
+# and WITNESS options.
+options 	SX_NOINLINE
+
 # SMP Debugging Options:
 #
 # PREEMPTION allows the threads that are in the kernel to be preempted
diff --git a/sys/conf/options b/sys/conf/options
index 7197ba29604..c9eadb18c2b 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -59,6 +59,7 @@ NO_SYSCTL_DESCR	opt_global.h
 
 # Miscellaneous options.
 ADAPTIVE_GIANT	opt_adaptive_mutexes.h
+ADAPTIVE_SX
 ALQ
 AUDIT		opt_global.h
 CODA_COMPAT_5	opt_coda.h
@@ -555,6 +556,7 @@ MSIZE			opt_global.h
 REGRESSION		opt_global.h
 RESTARTABLE_PANICS	opt_global.h
 RWLOCK_NOINLINE		opt_global.h
+SX_NOINLINE		opt_global.h
 VFS_BIO_DEBUG		opt_global.h
 
 # These are VM related options
diff --git a/sys/dev/acpica/acpi_ec.c b/sys/dev/acpica/acpi_ec.c
index 97d95c99d84..350fc86dd6c 100644
--- a/sys/dev/acpica/acpi_ec.c
+++ b/sys/dev/acpica/acpi_ec.c
@@ -142,6 +142,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sx.h>
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c
index 990162da736..5a82f744fb4 100644
--- a/sys/dev/mxge/if_mxge.c
+++ b/sys/dev/mxge/if_mxge.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/memrange.h>
 #include <sys/socket.h>
diff --git a/sys/dev/usb/if_aue.c b/sys/dev/usb/if_aue.c
index 348e6ef9f59..1add1022752 100644
--- a/sys/dev/usb/if_aue.c
+++ b/sys/dev/usb/if_aue.c
@@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/kdb.h>
+#include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sx.h>
diff --git a/sys/dev/usb/if_axe.c b/sys/dev/usb/if_axe.c
index 94e386c2ddc..ce33ed0ddb4 100644
--- a/sys/dev/usb/if_axe.c
+++ b/sys/dev/usb/if_axe.c
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
+#include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
diff --git a/sys/gnu/fs/xfs/FreeBSD/support/mrlock.c b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.c
index 950303938a0..7c3f8bfdfb5 100644
--- a/sys/gnu/fs/xfs/FreeBSD/support/mrlock.c
+++ b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.c
@@ -1,49 +1,14 @@
 #include <sys/param.h>
+#include <machine/pcpu.h>
+#include <support/debug.h>
 #include <support/mrlock.h>
 
-void
-_sx_xfs_destroy(struct sx *sx)
-{
-	if (sx->sx_cnt == -1)
-		sx_xunlock(sx);
-	sx_destroy(sx);
-}
-
-void
-_sx_xfs_lock(struct sx *sx, int type, const char *file, int line)
-{
-	if (type == MR_ACCESS)
-		_sx_slock(sx, file, line);
-	else if (type == MR_UPDATE)
-		_sx_sunlock(sx, file, line);
-	else
-		panic("Invalid lock type passed");
-}
-
-
-void
-_sx_xfs_unlock(struct sx *sx, const char *file, int line)
-{
-	if (_sx_xfs_xowned(sx))
-		_sx_xunlock(sx, file, line);
-	else if (_sx_xfs_sowned(sx))
-		_sx_sunlock(sx, file, line);
-	else
-		panic("lock is not locked");
-}
-
 int
 ismrlocked(mrlock_t *mrp, int type)
-{	
-	if (type == MR_ACCESS)
-		return _sx_xfs_sowned(mrp); /* Read lock */
-	else if (type == MR_UPDATE)
-		return _sx_xfs_xowned(mrp); /* Write lock */
-	else if (type == (MR_UPDATE | MR_ACCESS))
-		return  _sx_xfs_sowned(mrp) ||
-		        _sx_xfs_xowned(mrp); /* Any type of lock held */
-	return (mrp->sx_shrd_wcnt > 0 || mrp->sx_excl_wcnt > 0);
+{
+
+	sx_assert(mrp, SX_LOCKED);
+	if (type == MR_UPDATE)
+		return sx_xlocked(mrp);
+	return 1;
 }
-
-
-
diff --git a/sys/gnu/fs/xfs/FreeBSD/support/mrlock.h b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.h
index 4e82d419911..b41efc57dc4 100644
--- a/sys/gnu/fs/xfs/FreeBSD/support/mrlock.h
+++ b/sys/gnu/fs/xfs/FreeBSD/support/mrlock.h
@@ -4,62 +4,38 @@
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
-#include <sys/mutex.h>
 #include <sys/sx.h>
 
-#include <support/debug.h>
-
 /*
  * Implement mrlocks on FreeBSD that work for XFS.
- * Use FreeBSD sx lock and add necessary functions
- * if additional functionality is requested
+ * Map mrlock functions to corresponding equivalents in
+ * sx.
  */
 typedef struct sx mrlock_t;
 
 #define MR_ACCESS	1
 #define MR_UPDATE	2
 
-/* 
+/*
  * Compatibility defines, not really used
  */
 #define MRLOCK_BARRIER		0x1
 #define MRLOCK_ALLOW_EQUAL_PRI	0x8
 
-/*
- * mraccessf/mrupdatef take flags to be passed in while sleeping;
- * only PLTWAIT is currently supported.
- */
-#define mrinit(lock, name)	sx_init(lock, name)
 #define mrlock_init(lock, type, name, seq) sx_init(lock, name)
-#define mrfree(lock)		_sx_xfs_destroy(lock)
-#define	mraccessf(lock, f)	sx_slock(lock)
-#define	mrupdatef(lock, f)	sx_xlock(lock)
-#define mraccunlock(lock)	sx_sunlock(lock)
 #define mrtryaccess(lock)	sx_try_slock(lock)
 #define mrtryupdate(lock)	sx_try_xlock(lock)
-#define mraccess(mrp)		mraccessf(mrp, 0)
-#define mrupdate(mrp)		mrupdatef(mrp, 0)
-#define mrislocked_access(lock)	_sx_xfs_xowned(lock)
-#define mrislocked_update(lock)	_sx_xfs_sowned(lock)
-#define mrtrypromote(lock)	sx_try_upgrade(lock)
+#define mraccess(lock)		sx_slock(lock)
+#define mrupdate(lock)		sx_xlock(lock)
 #define mrdemote(lock)		sx_downgrade(lock)
+#define mrunlock(lock)		sx_unlock(lock)
 
-int	ismrlocked(mrlock_t *, int);
-void	_sx_xfs_lock(struct sx *sx, int type, const char *file, int line);
-void	_sx_xfs_unlock(struct sx *sx, const char *file, int line);
-void	_sx_xfs_destroy(struct sx *sx);
-#define _sx_xfs_xowned(lock) ((lock)->sx_cnt < 0)
-#define _sx_xfs_sowned(lock) ((lock)->sx_cnt > 0)
-
-/*
- * Functions, not implemented in FreeBSD 
- */
-#define mrunlock(lock) \
-        _sx_xfs_unlock(lock, __FILE__, __LINE__)
-
-#define	mrlock(lock, type, flags) \
-        _sx_xfs_lock(lock, type, __FILE__, __LINE__)
-
+#define mrfree(lock) do {		\
+	if (sx_xlocked(lock))		\
+		sx_xunlock(lock);	\
+	sx_destroy(lock);		\
+} while (0)
 
+int ismrlocked(mrlock_t *mrp, int type);
 
 #endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/sys/i386/acpica/acpi_machdep.c b/sys/i386/acpica/acpi_machdep.c
index 415ba256d16..e549fa79ba3 100644
--- a/sys/i386/acpica/acpi_machdep.c
+++ b/sys/i386/acpica/acpi_machdep.c
@@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
+#include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 67e3ca7de88..e6f35eb94d4 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -1,12 +1,14 @@
 /*-
- * Copyright (C) 2001 Jason Evans <jasone@freebsd.org>.  All rights reserved.
+ * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
+ * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
- *    the first lines of this file unmodified other than the possible 
+ *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
@@ -26,32 +28,88 @@
  */
 
 /*
- * Shared/exclusive locks.  This implementation assures deterministic lock
- * granting behavior, so that slocks and xlocks are interleaved.
+ * Shared/exclusive locks.  This implementation attempts to ensure
+ * deterministic lock granting behavior, so that slocks and xlocks are
+ * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
+#include "opt_adaptive_sx.h"
+#include "opt_ddb.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "opt_ddb.h"
-
 #include <sys/param.h>
-#include <sys/systm.h>
 #include <sys/ktr.h>
-#include <sys/linker_set.h>
-#include <sys/condvar.h>
 #include <sys/lock.h>
+#include <sys/lock_profile.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sleepqueue.h>
 #include <sys/sx.h>
-#include <sys/lock_profile.h>
+#include <sys/systm.h>
+
+#ifdef ADAPTIVE_SX
+#include <machine/cpu.h>
+#endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
+#endif
 
+#if !defined(SMP) && defined(ADAPTIVE_SX)
+#error "You must have SMP to enable the ADAPTIVE_SX option"
+#endif
+
+/* Handy macros for sleep queues. */
+#define	SQ_EXCLUSIVE_QUEUE	0
+#define	SQ_SHARED_QUEUE		1
+
+/*
+ * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
+ * drop Giant anytime we have to sleep or if we adaptively spin.
+ */
+#define	GIANT_DECLARE							\
+	int _giantcnt = 0;						\
+	WITNESS_SAVE_DECL(Giant)					\
+
+#define	GIANT_SAVE() do {						\
+	if (mtx_owned(&Giant)) {					\
+		WITNESS_SAVE(&Giant.lock_object, Giant);		\
+		while (mtx_owned(&Giant)) {				\
+			_giantcnt++;					\
+			mtx_unlock(&Giant);				\
+		}							\
+	}								\
+} while (0)
+
+#define GIANT_RESTORE() do {						\
+	if (_giantcnt > 0) {						\
+		mtx_assert(&Giant, MA_NOTOWNED);			\
+		while (_giantcnt--)					\
+			mtx_lock(&Giant);				\
+		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
+	}								\
+} while (0)
+
+/*
+ * Returns true if an exclusive lock is recursed.  It curthread
+ * currently has an exclusive lock.
+ */
+#define	sx_recursed(sx)		((sx)->sx_recurse != 0)
+
+/*
+ * Return a pointer to the owning thread if the lock is exclusively
+ * locked.
+ */
+#define	sx_xholder(sx)							\
+	((sx)->sx_lock & SX_LOCK_SHARED ? NULL :			\
+	(struct thread *)SX_OWNER((sx)->sx_lock))
+
+#ifdef DDB
 static void	db_show_sx(struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, int how);
@@ -89,7 +147,7 @@ unlock_sx(struct lock_object *lock)
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
-	sx_assert(sx, SX_LOCKED | LA_NOTRECURSED);
+	sx_assert(sx, SX_LOCKED | SX_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (1);
@@ -108,33 +166,33 @@ sx_sysinit(void *arg)
 }
 
 void
-sx_init(struct sx *sx, const char *description)
+sx_init_flags(struct sx *sx, const char *description, int opts)
 {
+	int flags;
 
-	sx->sx_lock = mtx_pool_find(mtxpool_lockbuilder, sx);
-	sx->sx_cnt = 0;
-	cv_init(&sx->sx_shrd_cv, description);
-	sx->sx_shrd_wcnt = 0;
-	cv_init(&sx->sx_excl_cv, description);
-	sx->sx_excl_wcnt = 0;
-	sx->sx_xholder = NULL;
+	flags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
+	if (opts & SX_DUPOK)
+		flags |= LO_DUPOK;
+	if (opts & SX_NOPROFILE)
+		flags |= LO_NOPROFILE;
+	if (!(opts & SX_NOWITNESS))
+		flags |= LO_WITNESS;
+	if (opts & SX_QUIET)
+		flags |= LO_QUIET;
+
+	flags |= opts & SX_ADAPTIVESPIN;
+	sx->sx_lock = SX_LOCK_UNLOCKED;
+	sx->sx_recurse = 0;
 	lock_profile_object_init(&sx->lock_object, &lock_class_sx, description);
-	lock_init(&sx->lock_object, &lock_class_sx, description, NULL,
-	    LO_WITNESS | LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE);
+	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
-	KASSERT((sx->sx_cnt == 0 && sx->sx_shrd_wcnt == 0 && sx->sx_excl_wcnt ==
-	    0), ("%s (%s): holders or waiters\n", __func__,
-	    sx->lock_object.lo_name));
-
-	sx->sx_lock = NULL;
-	cv_destroy(&sx->sx_shrd_cv);
-	cv_destroy(&sx->sx_excl_cv);
-	
+	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
+	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	lock_profile_object_destroy(&sx->lock_object);
 	lock_destroy(&sx->lock_object);
 }
@@ -142,224 +200,592 @@ sx_destroy(struct sx *sx)
 void
 _sx_slock(struct sx *sx, const char *file, int line)
 {
-	uint64_t waittime = 0;
-	int contested = 0;
 
-	mtx_lock(sx->sx_lock);
-	KASSERT(sx->sx_xholder != curthread,
-	    ("%s (%s): slock while xlock is held @ %s:%d\n", __func__,
-	    sx->lock_object.lo_name, file, line));
+	MPASS(curthread != NULL);
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line);
-
-	/*
-	 * Loop in case we lose the race for lock acquisition.
-	 */
-	while (sx->sx_cnt < 0) {
-		sx->sx_shrd_wcnt++;
-		lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime);
-		cv_wait(&sx->sx_shrd_cv, sx->sx_lock);
-		sx->sx_shrd_wcnt--;
-	}
-
-	/* Acquire a shared lock. */
-	sx->sx_cnt++;
-
-        if (sx->sx_cnt == 1)
-		lock_profile_obtain_lock_success(&sx->lock_object, contested, waittime, file, line);
-
+	__sx_slock(sx, file, line);
 	LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&sx->lock_object, 0, file, line);
 	curthread->td_locks++;
-
-	mtx_unlock(sx->sx_lock);
 }
 
 int
 _sx_try_slock(struct sx *sx, const char *file, int line)
 {
+	uintptr_t x;
 
-	mtx_lock(sx->sx_lock);
-	if (sx->sx_cnt >= 0) {
-		sx->sx_cnt++;
+	x = sx->sx_lock;
+	if ((x & SX_LOCK_SHARED) && atomic_cmpset_acq_ptr(&sx->sx_lock, x,
+	    x + SX_ONE_SHARER)) {
 		LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
 		curthread->td_locks++;
-		mtx_unlock(sx->sx_lock);
 		return (1);
-	} else {
-		LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
-		mtx_unlock(sx->sx_lock);
-		return (0);
 	}
+
+	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
+	return (0);
 }
 
 void
 _sx_xlock(struct sx *sx, const char *file, int line)
 {
-	int contested = 0;
-	uint64_t waittime = 0;
 
-	mtx_lock(sx->sx_lock);
-
-	/*
-	 * With sx locks, we're absolutely not permitted to recurse on
-	 * xlocks, as it is fatal (deadlock). Normally, recursion is handled
-	 * by WITNESS, but as it is not semantically correct to hold the
-	 * xlock while in here, we consider it API abuse and put it under
-	 * INVARIANTS.
-	 */
-	KASSERT(sx->sx_xholder != curthread,
-	    ("%s (%s): xlock already held @ %s:%d", __func__,
-	    sx->lock_object.lo_name, file, line));
+	MPASS(curthread != NULL);
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line);
-
-	/* Loop in case we lose the race for lock acquisition. */
-	while (sx->sx_cnt != 0) {
-		sx->sx_excl_wcnt++;
-		lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime);
-		cv_wait(&sx->sx_excl_cv, sx->sx_lock);
-		sx->sx_excl_wcnt--;
-	}
-
-	MPASS(sx->sx_cnt == 0);
-
-	/* Acquire an exclusive lock. */
-	sx->sx_cnt--;
-	sx->sx_xholder = curthread;
-
-	lock_profile_obtain_lock_success(&sx->lock_object, contested, waittime, file, line);
-	LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, 0, file, line);
+	__sx_xlock(sx, curthread, file, line);
+	LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line);
 	WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	curthread->td_locks++;
-
-	mtx_unlock(sx->sx_lock);
 }
 
 int
 _sx_try_xlock(struct sx *sx, const char *file, int line)
 {
+	int rval;
 
-	mtx_lock(sx->sx_lock);
-	if (sx->sx_cnt == 0) {
-		sx->sx_cnt--;
-		sx->sx_xholder = curthread;
-		LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, 1, file, line);
-		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file,
-		    line);
+	MPASS(curthread != NULL);
+
+	if (sx_xlocked(sx)) {
+		sx->sx_recurse++;
+		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+		rval = 1;
+	} else
+		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
+		    (uintptr_t)curthread);
+	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
+	if (rval) {
+		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
+		    file, line);
 		curthread->td_locks++;
-		mtx_unlock(sx->sx_lock);
-		return (1);
-	} else {
-		LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, 0, file, line);
-		mtx_unlock(sx->sx_lock);
-		return (0);
 	}
+
+	return (rval);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
-	_sx_assert(sx, SX_SLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
 
+	MPASS(curthread != NULL);
+	_sx_assert(sx, SX_SLOCKED, file, line);
 	curthread->td_locks--;
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
-
-	/* Release. */
-	sx->sx_cnt--;
-
-	if (sx->sx_cnt == 0) {
-		lock_profile_release_lock(&sx->lock_object);
-	}
-
-	/*
-	 * If we just released the last shared lock, wake any waiters up, giving
-	 * exclusive lockers precedence.  In order to make sure that exclusive
-	 * lockers won't be blocked forever, don't wake shared lock waiters if
-	 * there are exclusive lock waiters.
-	 */
-	if (sx->sx_excl_wcnt > 0) {
-		if (sx->sx_cnt == 0)
-			cv_signal(&sx->sx_excl_cv);
-	} else if (sx->sx_shrd_wcnt > 0)
-		cv_broadcast(&sx->sx_shrd_cv);
-
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
-
-	mtx_unlock(sx->sx_lock);
+	lock_profile_release_lock(&sx->lock_object);
+	__sx_sunlock(sx, file, line);
 }
 
 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
-	_sx_assert(sx, SX_XLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
-	MPASS(sx->sx_cnt == -1);
 
+	MPASS(curthread != NULL);
+	_sx_assert(sx, SX_XLOCKED, file, line);
 	curthread->td_locks--;
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
-
-	/* Release. */
-	sx->sx_cnt++;
-	sx->sx_xholder = NULL;
-
-	/*
-	 * Wake up waiters if there are any.  Give precedence to slock waiters.
-	 */
-	if (sx->sx_shrd_wcnt > 0)
-		cv_broadcast(&sx->sx_shrd_cv);
-	else if (sx->sx_excl_wcnt > 0)
-		cv_signal(&sx->sx_excl_cv);
-
-	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, 0, file, line);
-
+	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
+	    line);
 	lock_profile_release_lock(&sx->lock_object);
-	mtx_unlock(sx->sx_lock);
+	__sx_xunlock(sx, curthread, file, line);
 }
 
+/*
+ * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
+ * This will only succeed if this thread holds a single shared lock.
+ * Return 1 if if the upgrade succeed, 0 otherwise.
+ */
 int
 _sx_try_upgrade(struct sx *sx, const char *file, int line)
 {
+	uintptr_t x;
+	int success;
 
 	_sx_assert(sx, SX_SLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
 
-	if (sx->sx_cnt == 1) {
-		sx->sx_cnt = -1;
-		sx->sx_xholder = curthread;
-
-		LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, 1, file, line);
+	/*
+	 * Try to switch from one shared lock to an exclusive lock.  We need
+	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
+	 * we will wake up the exclusive waiters when we drop the lock.
+	 */
+	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
+	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
+	    (uintptr_t)curthread | x);
+	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
+	if (success)
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
-
-		mtx_unlock(sx->sx_lock);
-		return (1);
-	} else {
-		LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, 0, file, line);
-		mtx_unlock(sx->sx_lock);
-		return (0);
-	}
+	return (success);
 }
 
+/*
+ * Downgrade an unrecursed exclusive lock into a single shared lock.
+ */
 void
 _sx_downgrade(struct sx *sx, const char *file, int line)
 {
+	uintptr_t x;
 
-	_sx_assert(sx, SX_XLOCKED, file, line);
-	mtx_lock(sx->sx_lock);
-	MPASS(sx->sx_cnt == -1);
+	_sx_assert(sx, SX_XLOCKED | SX_NOTRECURSED, file, line);
+#ifndef INVARIANTS
+	if (sx_recursed(sx))
+		panic("downgrade of a recursed lock");
+#endif
 
 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
-	sx->sx_cnt = 1;
-	sx->sx_xholder = NULL;
-        if (sx->sx_shrd_wcnt > 0)
-                cv_broadcast(&sx->sx_shrd_cv);
+	/*
+	 * Try to switch from an exclusive lock with no shared waiters
+	 * to one sharer with no shared waiters.  If there are
+	 * exclusive waiters, we don't need to lock the sleep queue so
+	 * long as we preserve the flag.  We do one quick try and if
+	 * that fails we grab the sleepq lock to keep the flags from
+	 * changing and do it the slow way.
+	 *
+	 * We have to lock the sleep queue if there are shared waiters
+	 * so we can wake them up.
+	 */
+	x = sx->sx_lock;
+	if (!(x & SX_LOCK_SHARED_WAITERS) &&
+	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
+	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
+		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
+		return;
+	}
+
+	/*
+	 * Lock the sleep queue so we can read the waiters bits
+	 * without any races and wakeup any shared waiters.
+	 */
+	sleepq_lock(&sx->lock_object);
+
+	/*
+	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
+	 * shared lock.  If there are any shared waiters, wake them up.
+	 */
+	x = sx->sx_lock;
+	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
+	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
+	if (x & SX_LOCK_SHARED_WAITERS)
+		sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1,
+		    SQ_SHARED_QUEUE);
+	else
+		sleepq_release(&sx->lock_object);
 
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
+}
 
-	mtx_unlock(sx->sx_lock);
+/*
+ * This function represents the so-called 'hard case' for sx_xlock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+void
+_sx_xlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
+{
+	GIANT_DECLARE;
+#ifdef ADAPTIVE_SX
+	volatile struct thread *owner;
+#endif
+	uintptr_t x;
+
+	/* If we already hold an exclusive lock, then recurse. */
+	if (sx_xlocked(sx)) {
+		sx->sx_recurse++;
+		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
+		return;
+	}
+
+	if (LOCK_LOG_TEST(&sx->lock_object, 0))
+		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
+		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
+
+	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
+#ifdef ADAPTIVE_SX
+		/*
+		 * If the lock is write locked and the owner is
+		 * running on another CPU, spin until the owner stops
+		 * running or the state of the lock changes.
+		 */
+		x = sx->sx_lock;
+		if (!(x & SX_LOCK_SHARED) &&
+		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+			x = SX_OWNER(x);
+			owner = (struct thread *)x;
+			if (TD_IS_RUNNING(owner)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR3(KTR_LOCK,
+					    "%s: spinning on %p held by %p",
+					    __func__, sx, owner);
+				GIANT_SAVE();
+				while (SX_OWNER(sx->sx_lock) == x &&
+				    TD_IS_RUNNING(owner))
+					cpu_spinwait();
+				continue;
+			}
+		}
+#endif
+
+		sleepq_lock(&sx->lock_object);
+		x = sx->sx_lock;
+
+		/*
+		 * If the lock was released while spinning on the
+		 * sleep queue chain lock, try again.
+		 */
+		if (x == SX_LOCK_UNLOCKED) {
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+
+#ifdef ADAPTIVE_SX
+		/*
+		 * The current lock owner might have started executing
+		 * on another CPU (or the lock could have changed
+		 * owners) while we were waiting on the sleep queue
+		 * chain lock.  If so, drop the sleep queue lock and try
+		 * again.
+		 */
+		if (!(x & SX_LOCK_SHARED) &&
+		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+			owner = (struct thread *)SX_OWNER(x);
+			if (TD_IS_RUNNING(owner)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+		}
+#endif
+
+		/*
+		 * If an exclusive lock was released with both shared
+		 * and exclusive waiters and a shared waiter hasn't
+		 * woken up and acquired the lock yet, sx_lock will be
+		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
+		 * If we see that value, try to acquire it once.  Note
+		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
+		 * as there are other exclusive waiters still.  If we
+		 * fail, restart the loop.
+		 */
+		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
+			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
+			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
+			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
+				sleepq_release(&sx->lock_object);
+				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
+				    __func__, sx);
+				break;
+			}
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+
+		/*
+		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
+		 * than loop back and retry.
+		 */
+		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
+			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
+			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+			if (LOCK_LOG_TEST(&sx->lock_object, 0))
+				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
+				    __func__, sx);
+		}
+
+		/*
+		 * Since we have been unable to acquire the exclusive
+		 * lock and the exclusive waiters flag is set, we have
+		 * to sleep.
+		 */
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
+			    __func__, sx);
+
+		GIANT_SAVE();
+		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
+		    SLEEPQ_SX, SQ_EXCLUSIVE_QUEUE);
+		sleepq_wait(&sx->lock_object);
+
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
+			    __func__, sx);
+	}
+
+	GIANT_RESTORE();
+}
+
+/*
+ * This function represents the so-called 'hard case' for sx_xunlock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+void
+_sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
+{
+	uintptr_t x;
+	int queue;
+
+	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
+
+	/* If the lock is recursed, then unrecurse one level. */
+	if (sx_xlocked(sx) && sx_recursed(sx)) {
+		if ((--sx->sx_recurse) == 0)
+			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
+		return;
+	}
+	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
+	    SX_LOCK_EXCLUSIVE_WAITERS));
+	if (LOCK_LOG_TEST(&sx->lock_object, 0))
+		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
+
+	sleepq_lock(&sx->lock_object);
+	x = SX_LOCK_UNLOCKED;
+
+	/*
+	 * The wake up algorithm here is quite simple and probably not
+	 * ideal.  It gives precedence to shared waiters if they are
+	 * present.  For this condition, we have to preserve the
+	 * state of the exclusive waiters flag.
+	 */
+	if (sx->sx_lock & SX_LOCK_SHARED_WAITERS) {
+		queue = SQ_SHARED_QUEUE;
+		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
+	} else
+		queue = SQ_EXCLUSIVE_QUEUE;
+
+	/* Wake up all the waiters for the specific queue. */
+	if (LOCK_LOG_TEST(&sx->lock_object, 0))
+		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
+		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
+		    "exclusive");
+	atomic_store_rel_ptr(&sx->sx_lock, x);
+	sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1, queue);
+}
+
+/*
+ * This function represents the so-called 'hard case' for sx_slock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+void
+_sx_slock_hard(struct sx *sx, const char *file, int line)
+{
+	GIANT_DECLARE;
+#ifdef ADAPTIVE_SX
+	volatile struct thread *owner;
+#endif
+	uintptr_t x;
+
+	/*
+	 * As with rwlocks, we don't make any attempt to try to block
+	 * shared locks once there is an exclusive waiter.
+	 */
+	for (;;) {
+		x = sx->sx_lock;
+
+		/*
+		 * If no other thread has an exclusive lock then try to bump up
+		 * the count of sharers.  Since we have to preserve the state
+		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
+		 * shared lock loop back and retry.
+		 */
+		if (x & SX_LOCK_SHARED) {
+			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
+			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
+			    x + SX_ONE_SHARER)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR4(KTR_LOCK,
+					    "%s: %p succeed %p -> %p", __func__,
+					    sx, (void *)x,
+					    (void *)(x + SX_ONE_SHARER));
+				break;
+			}
+			continue;
+		}
+
+#ifdef ADAPTIVE_SX
+		/*
+		 * If the owner is running on another CPU, spin until
+		 * the owner stops running or the state of the lock
+		 * changes.
+		 */
+		else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
+			x = SX_OWNER(x);
+			owner = (struct thread *)x;
+			if (TD_IS_RUNNING(owner)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR3(KTR_LOCK,
+					    "%s: spinning on %p held by %p",
+					    __func__, sx, owner);
+				GIANT_SAVE();
+				while (SX_OWNER(sx->sx_lock) == x &&
+				    TD_IS_RUNNING(owner))
+					cpu_spinwait();
+				continue;
+			}
+		}
+#endif
+
+		/*
+		 * Some other thread already has an exclusive lock, so
+		 * start the process of blocking.
+		 */
+		sleepq_lock(&sx->lock_object);
+		x = sx->sx_lock;
+
+		/*
+		 * The lock could have been released while we spun.
+		 * In this case loop back and retry.
+		 */
+		if (x & SX_LOCK_SHARED) {
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+
+#ifdef ADAPTIVE_SX
+		/*
+		 * If the owner is running on another CPU, spin until
+		 * the owner stops running or the state of the lock
+		 * changes.
+		 */
+		if (!(x & SX_LOCK_SHARED) &&
+		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
+			owner = (struct thread *)SX_OWNER(x);
+			if (TD_IS_RUNNING(owner)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+		}
+#endif
+
+		/*
+		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
+		 * fail to set it drop the sleep queue lock and loop
+		 * back.
+		 */
+		if (!(x & SX_LOCK_SHARED_WAITERS)) {
+			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
+			    x | SX_LOCK_SHARED_WAITERS)) {
+				sleepq_release(&sx->lock_object);
+				continue;
+			}
+			if (LOCK_LOG_TEST(&sx->lock_object, 0))
+				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
+				    __func__, sx);
+		}
+
+		/*
+		 * Since we have been unable to acquire the shared lock,
+		 * we have to sleep.
+		 */
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
+			    __func__, sx);
+
+		GIANT_SAVE();
+		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
+		    SLEEPQ_SX, SQ_SHARED_QUEUE);
+		sleepq_wait(&sx->lock_object);
+
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
+			    __func__, sx);
+	}
+
+	GIANT_RESTORE();
+}
+
+/*
+ * This function represents the so-called 'hard case' for sx_sunlock
+ * operation.  All 'easy case' failures are redirected to this.  Note
+ * that ideally this would be a static function, but it needs to be
+ * accessible from at least sx.h.
+ */
+void
+_sx_sunlock_hard(struct sx *sx, const char *file, int line)
+{
+	uintptr_t x;
+
+	for (;;) {
+		x = sx->sx_lock;
+
+		/*
+		 * We should never have sharers while at least one thread
+		 * holds a shared lock.
+		 */
+		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
+		    ("%s: waiting sharers", __func__));
+
+		/*
+		 * See if there is more than one shared lock held.  If
+		 * so, just drop one and return.
+		 */
+		if (SX_SHARERS(x) > 1) {
+			if (atomic_cmpset_ptr(&sx->sx_lock, x,
+			    x - SX_ONE_SHARER)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR4(KTR_LOCK,
+					    "%s: %p succeeded %p -> %p",
+					    __func__, sx, (void *)x,
+					    (void *)(x - SX_ONE_SHARER));
+				break;
+			}
+			continue;
+		}
+
+		/*
+		 * If there aren't any waiters for an exclusive lock,
+		 * then try to drop it quickly.
+		 */
+		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
+			MPASS(x == SX_SHARERS_LOCK(1));
+			if (atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1),
+			    SX_LOCK_UNLOCKED)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR2(KTR_LOCK, "%s: %p last succeeded",
+					    __func__, sx);
+				break;
+			}
+			continue;
+		}
+
+		/*
+		 * At this point, there should just be one sharer with
+		 * exclusive waiters.
+		 */
+		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
+
+		sleepq_lock(&sx->lock_object);
+
+		/*
+		 * Wake up semantic here is quite simple:
+		 * Just wake up all the exclusive waiters.
+		 * Note that the state of the lock could have changed,
+		 * so if it fails loop back and retry.
+		 */
+		if (!atomic_cmpset_ptr(&sx->sx_lock,
+		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
+		    SX_LOCK_UNLOCKED)) {
+			sleepq_release(&sx->lock_object);
+			continue;
+		}
+		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
+			    "exclusive queue", __func__, sx);
+		sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1,
+		    SQ_EXCLUSIVE_QUEUE);
+		break;
+	}
 }
 
 #ifdef INVARIANT_SUPPORT
@@ -375,45 +801,76 @@ _sx_downgrade(struct sx *sx, const char *file, int line)
 void
 _sx_assert(struct sx *sx, int what, const char *file, int line)
 {
+#ifndef WITNESS
+	int slocked = 0;
+#endif
 
 	if (panicstr != NULL)
 		return;
 	switch (what) {
-	case SX_LOCKED:
-	case SX_LOCKED | LA_NOTRECURSED:
 	case SX_SLOCKED:
+	case SX_SLOCKED | SX_NOTRECURSED:
+	case SX_SLOCKED | SX_RECURSED:
+#ifndef WITNESS
+		slocked = 1;
+		/* FALLTHROUGH */
+#endif
+	case SX_LOCKED:
+	case SX_LOCKED | SX_NOTRECURSED:
+	case SX_LOCKED | SX_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
-		mtx_lock(sx->sx_lock);
-		if (sx->sx_cnt <= 0 &&
-		    (what == SX_SLOCKED || sx->sx_xholder != curthread))
+		/*
+		 * If some other thread has an exclusive lock or we
+		 * have one and are asserting a shared lock, fail.
+		 * Also, if no one has a lock at all, fail.
+		 */
+		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
+		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
+		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
-			    sx->lock_object.lo_name, (what == SX_SLOCKED) ?
-			    "share " : "", file, line);
-		mtx_unlock(sx->sx_lock);
+			    sx->lock_object.lo_name, slocked ? "share " : "",
+			    file, line);
+
+		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
+			if (sx_recursed(sx)) {
+				if (what & SX_NOTRECURSED)
+					panic("Lock %s recursed @ %s:%d\n",
+					    sx->lock_object.lo_name, file,
+					    line);
+			} else if (what & SX_RECURSED)
+				panic("Lock %s not recursed @ %s:%d\n",
+				    sx->lock_object.lo_name, file, line);
+		}
 #endif
 		break;
 	case SX_XLOCKED:
-		mtx_lock(sx->sx_lock);
-		if (sx->sx_xholder != curthread)
+	case SX_XLOCKED | SX_NOTRECURSED:
+	case SX_XLOCKED | SX_RECURSED:
+		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
-		mtx_unlock(sx->sx_lock);
+		if (sx_recursed(sx)) {
+			if (what & SX_NOTRECURSED)
+				panic("Lock %s recursed @ %s:%d\n",
+				    sx->lock_object.lo_name, file, line);
+		} else if (what & SX_RECURSED)
+			panic("Lock %s not recursed @ %s:%d\n",
+			    sx->lock_object.lo_name, file, line);
 		break;
 	case SX_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
-		 * We are able to check only exclusive lock here,
-		 * we cannot assert that *this* thread owns slock.
+		 * If we hold an exclusve lock fail.  We can't
+		 * reliably check to see if we hold a shared lock or
+		 * not.
 		 */
-		mtx_lock(sx->sx_lock);
-		if (sx->sx_xholder == curthread)
+		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
-		mtx_unlock(sx->sx_lock);
 #endif
 		break;
 	default:
@@ -424,7 +881,7 @@ _sx_assert(struct sx *sx, int what, const char *file, int line)
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
-void
+static void
 db_show_sx(struct lock_object *lock)
 {
 	struct thread *td;
@@ -433,16 +890,33 @@ db_show_sx(struct lock_object *lock)
 	sx = (struct sx *)lock;
 
 	db_printf(" state: ");
-	if (sx->sx_cnt < 0) {
-		td = sx->sx_xholder;
+	if (sx->sx_lock == SX_LOCK_UNLOCKED)
+		db_printf("UNLOCKED\n");
+	else if (sx->sx_lock & SX_LOCK_SHARED)
+		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
+	else {
+		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
-	} else if (sx->sx_cnt > 0)
-		db_printf("SLOCK: %d locks\n", sx->sx_cnt);
-	else
-		db_printf("UNLOCKED\n");
-	db_printf(" waiters: %d shared, %d exclusive\n", sx->sx_shrd_wcnt,
-	    sx->sx_excl_wcnt);
+		if (sx_recursed(sx))
+			db_printf(" recursed: %d\n", sx->sx_recurse);
+	}
+
+	db_printf(" waiters: ");
+	switch(sx->sx_lock &
+	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
+	case SX_LOCK_SHARED_WAITERS:
+		db_printf("shared\n");
+		break;
+	case SX_LOCK_EXCLUSIVE_WAITERS:
+		db_printf("exclusive\n");
+		break;
+	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
+		db_printf("exclusive and shared\n");
+		break;
+	default:
+		db_printf("none\n");
+	}
 }
 
 /*
@@ -454,47 +928,25 @@ int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	struct sx *sx;
-	struct cv *cv;
 
 	/*
-	 * First, see if it looks like td is blocked on a condition
-	 * variable.
+	 * Check to see if this thread is blocked on an sx lock.
+	 * First, we check the lock class.  If that is ok, then we
+	 * compare the lock name against the wait message.
 	 */
-	cv = td->td_wchan;
-	if (cv->cv_description != td->td_wmesg)
+	sx = td->td_wchan;
+	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
+	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
-	/*
-	 * Ok, see if it looks like td is blocked on the exclusive
-	 * condition variable.
-	 */
-	sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_excl_cv));
-	if (LOCK_CLASS(&sx->lock_object) == &lock_class_sx &&
-	    sx->sx_excl_wcnt > 0)
-		goto ok;
-
-	/*
-	 * Second, see if it looks like td is blocked on the shared
-	 * condition variable.
-	 */
-	sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_shrd_cv));
-	if (LOCK_CLASS(&sx->lock_object) == &lock_class_sx &&
-	    sx->sx_shrd_wcnt > 0)
-		goto ok;
-
-	/* Doesn't seem to be an sx lock. */
-	return (0);
-
-ok:
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
-	if (sx->sx_cnt >= 0) {
-		db_printf("SLOCK (count %d)\n", sx->sx_cnt);
-		*ownerp = NULL;
-	} else {
+	*ownerp = sx_xholder(sx);
+	if (sx->sx_lock & SX_LOCK_SHARED)
+		db_printf("SLOCK (count %ju)\n",
+		    (uintmax_t)SX_SHARERS(sx->sx_lock));
+	else
 		db_printf("XLOCK\n");
-		*ownerp = sx->sx_xholder;
-	}
 	return (1);
 }
 #endif
diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c
index f7a6ff748a2..204781ef7ca 100644
--- a/sys/netinet6/in6_src.c
+++ b/sys/netinet6/in6_src.c
@@ -66,6 +66,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
diff --git a/sys/sys/_sx.h b/sys/sys/_sx.h
new file mode 100644
index 00000000000..0d0351c6270
--- /dev/null
+++ b/sys/sys/_sx.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice(s), this list of conditions and the following disclaimer as
+ *    the first lines of this file unmodified other than the possible 
+ *    addition of one or more copyright notices.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice(s), this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SYS__SX_H_
+#define	_SYS__SX_H_
+
+/*
+ * Shared/exclusive lock main structure definition.
+ */
+struct sx {
+	struct lock_object	lock_object;
+	volatile uintptr_t	sx_lock;
+	volatile unsigned	sx_recurse;
+};
+
+#endif	/* !_SYS__SX_H_ */
diff --git a/sys/sys/sleepqueue.h b/sys/sys/sleepqueue.h
index f25bb59edf4..58d667e3ddf 100644
--- a/sys/sys/sleepqueue.h
+++ b/sys/sys/sleepqueue.h
@@ -33,8 +33,9 @@
 #define _SYS_SLEEPQUEUE_H_
 
 /*
- * Sleep queue interface.  Sleep/wakeup and condition variables use a sleep
- * queue for the queue of threads blocked on a sleep channel.
+ * Sleep queue interface.  Sleep/wakeup, condition variables, and sx
+ * locks use a sleep queue for the queue of threads blocked on a sleep
+ * channel.
  *
  * A thread calls sleepq_lock() to lock the sleep queue chain associated
  * with a given wait channel.  A thread can then call call sleepq_add() to
@@ -85,6 +86,7 @@ struct thread;
 #define	SLEEPQ_SLEEP		0x00		/* Used by sleep/wakeup. */
 #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
 #define	SLEEPQ_PAUSE		0x02		/* Used by pause. */
+#define	SLEEPQ_SX		0x03		/* Used by an sx lock. */
 #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
 
 void	init_sleepqueues(void);
diff --git a/sys/sys/sx.h b/sys/sys/sx.h
index 64bf10d3020..c1838f5ac39 100644
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
@@ -1,5 +1,7 @@
 /*-
- * Copyright (C) 2001 Jason Evans <jasone@freebsd.org>.  All rights reserved.
+ * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
+ * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -30,24 +32,132 @@
 #ifndef	_SYS_SX_H_
 #define	_SYS_SX_H_
 
-#include <sys/queue.h>
 #include <sys/_lock.h>
-#include <sys/condvar.h>	/* XXX */
+#include <sys/_sx.h>
 
-struct sx {
-	struct lock_object lock_object;	/* Common lock properties. */
-	struct mtx	*sx_lock;	/* General protection lock. */
-	int		sx_cnt;		/* -1: xlock, > 0: slock count. */
-	struct cv	sx_shrd_cv;	/* slock waiters. */
-	int		sx_shrd_wcnt;	/* Number of slock waiters. */
-	struct cv	sx_excl_cv;	/* xlock waiters. */
-	int		sx_excl_wcnt;	/* Number of xlock waiters. */
-	struct thread	*sx_xholder;	/* Thread presently holding xlock. */
-};
+#ifdef	_KERNEL
+#include <machine/atomic.h>
+#endif
+
+/*
+ * In general, the sx locks and rwlocks use very similar algorithms.
+ * The main difference in the implementations is how threads are
+ * blocked when a lock is unavailable.  For this, sx locks use sleep
+ * queues which do not support priority propagation, and rwlocks use
+ * turnstiles which do.
+ *
+ * The sx_lock field consists of several fields.  The low bit
+ * indicates if the lock is locked with a shared or exclusive lock.  A
+ * value of 0 indicates an exclusive lock, and a value of 1 indicates
+ * a shared lock.  Bit 1 is a boolean indicating if there are any
+ * threads waiting for a shared lock.  Bit 2 is a boolean indicating
+ * if there are any threads waiting for an exclusive lock.  Bit 3 is a
+ * boolean indicating if an exclusive lock is recursively held.  The
+ * rest of the variable's definition is dependent on the value of the
+ * first bit.  For an exclusive lock, it is a pointer to the thread
+ * holding the lock, similar to the mtx_lock field of mutexes.  For
+ * shared locks, it is a count of read locks that are held.
+ *
+ * When the lock is not locked by any thread, it is encoded as a
+ * shared lock with zero waiters.
+ *
+ * A note about memory barriers.  Exclusive locks need to use the same
+ * memory barriers as mutexes: _acq when acquiring an exclusive lock
+ * and _rel when releasing an exclusive lock.  On the other side,
+ * shared lock needs to use an _acq barrier when acquiring the lock
+ * but, since they don't update any locked data, no memory barrier is
+ * needed when releasing a shared lock.
+ */
+
+#define	SX_LOCK_SHARED			0x01
+#define	SX_LOCK_SHARED_WAITERS		0x02
+#define	SX_LOCK_EXCLUSIVE_WAITERS	0x04
+#define	SX_LOCK_RECURSED		0x08
+#define	SX_LOCK_FLAGMASK						\
+	(SX_LOCK_SHARED | SX_LOCK_SHARED_WAITERS |			\
+	SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED)
+
+#define	SX_OWNER(x)			((x) & ~SX_LOCK_FLAGMASK)
+#define	SX_SHARERS_SHIFT		4
+#define	SX_SHARERS(x)			(SX_OWNER(x) >> SX_SHARERS_SHIFT)
+#define	SX_SHARERS_LOCK(x)						\
+	((x) << SX_SHARERS_SHIFT | SX_LOCK_SHARED)
+#define	SX_ONE_SHARER			(1 << SX_SHARERS_SHIFT)
+
+#define	SX_LOCK_UNLOCKED		SX_SHARERS_LOCK(0)
 
 #ifdef _KERNEL
+
+/*
+ * Full lock operations that are suitable to be inlined in non-debug kernels.
+ * If the lock can't be acquired or released trivially then the work is
+ * deferred to 'tougher' functions.
+ */
+
+/* Acquire an exclusive lock. */
+#define	__sx_xlock(sx, tid, file, line) do {				\
+	uintptr_t _tid = (uintptr_t)(tid);				\
+	int contested = 0;                                              \
+        uint64_t waitstart = 0;                                         \
+									\
+	if (!atomic_cmpset_acq_ptr(&(sx)->sx_lock, SX_LOCK_UNLOCKED,	\
+	    _tid)) {							\
+		lock_profile_obtain_lock_failed(&(sx)->lock_object,	\
+		    &contested, &waitstart);				\
+		_sx_xlock_hard((sx), _tid, (file), (line));		\
+	}								\
+	lock_profile_obtain_lock_success(&(sx)->lock_object, contested,	\
+	    waitstart, (file), (line));					\
+} while (0)
+
+/* Release an exclusive lock. */
+#define	__sx_xunlock(sx, tid, file, line) do {				\
+	uintptr_t _tid = (uintptr_t)(tid);				\
+									\
+	if (!atomic_cmpset_rel_ptr(&(sx)->sx_lock, _tid,		\
+	    SX_LOCK_UNLOCKED))						\
+		_sx_xunlock_hard((sx), _tid, (file), (line));		\
+} while (0)
+
+/* Acquire a shared lock. */
+#define	__sx_slock(sx, file, line) do {					\
+	uintptr_t x = (sx)->sx_lock;					\
+	int contested = 0;                                              \
+        uint64_t waitstart = 0;                                         \
+									\
+	if (!(x & SX_LOCK_SHARED) ||					\
+	    !atomic_cmpset_acq_ptr(&(sx)->sx_lock, x,			\
+	    x + SX_ONE_SHARER)) {					\
+		lock_profile_obtain_lock_failed(&(sx)->lock_object,	\
+		    &contested, &waitstart);				\
+		_sx_slock_hard((sx), (file), (line));			\
+	}								\
+	lock_profile_obtain_lock_success(&(sx)->lock_object, contested,	\
+	    waitstart, (file), (line));					\
+} while (0)
+
+/*
+ * Release a shared lock.  We can just drop a single shared lock so
+ * long as we aren't trying to drop the last shared lock when other
+ * threads are waiting for an exclusive lock.  This takes advantage of
+ * the fact that an unlocked lock is encoded as a shared lock with a
+ * count of 0.
+ */
+#define	__sx_sunlock(sx, file, line) do {				\
+	uintptr_t x = (sx)->sx_lock;					\
+									\
+	if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) ||	\
+	    !atomic_cmpset_ptr(&(sx)->sx_lock, x, x - SX_ONE_SHARER))	\
+		_sx_sunlock_hard((sx), (file), (line));			\
+} while (0)
+
+/*
+ * Function prototipes.  Routines that start with an underscore are not part
+ * of the public interface and are wrappered with a macro.
+ */
 void	sx_sysinit(void *arg);
-void	sx_init(struct sx *sx, const char *description);
+#define	sx_init(sx, desc)	sx_init_flags((sx), (desc), 0)
+void	sx_init_flags(struct sx *sx, const char *description, int opts);
 void	sx_destroy(struct sx *sx);
 void	_sx_slock(struct sx *sx, const char *file, int line);
 void	_sx_xlock(struct sx *sx, const char *file, int line);
@@ -57,6 +167,12 @@ void	_sx_sunlock(struct sx *sx, const char *file, int line);
 void	_sx_xunlock(struct sx *sx, const char *file, int line);
 int	_sx_try_upgrade(struct sx *sx, const char *file, int line);
 void	_sx_downgrade(struct sx *sx, const char *file, int line);
+void	_sx_xlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
+	    line);
+void	_sx_slock_hard(struct sx *sx, const char *file, int line);
+void	_sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
+	    line);
+void	_sx_sunlock_hard(struct sx *sx, const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void	_sx_assert(struct sx *sx, int what, const char *file, int line);
 #endif
@@ -79,29 +195,63 @@ struct sx_args {
 	SYSUNINIT(name##_sx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    sx_destroy, (sxa))
 
-#define	sx_xlocked(sx)		((sx)->sx_cnt < 0 && (sx)->sx_xholder == curthread)
-#define	sx_slock(sx)		_sx_slock((sx), LOCK_FILE, LOCK_LINE)
+/*
+ * Public interface for lock operations.
+ */
+#ifndef LOCK_DEBUG
+#error	"LOCK_DEBUG not defined, include <sys/lock.h> before <sys/sx.h>"
+#endif
+#if	(LOCK_DEBUG > 0) || defined(SX_NOINLINE)
 #define	sx_xlock(sx)		_sx_xlock((sx), LOCK_FILE, LOCK_LINE)
+#define	sx_xunlock(sx)		_sx_xunlock((sx), LOCK_FILE, LOCK_LINE)
+#define	sx_slock(sx)		_sx_slock((sx), LOCK_FILE, LOCK_LINE)
+#define	sx_sunlock(sx)		_sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
+#else
+#define	sx_xlock(sx)							\
+	__sx_xlock((sx), curthread, LOCK_FILE, LOCK_LINE)
+#define	sx_xunlock(sx)							\
+	__sx_xunlock((sx), curthread, LOCK_FILE, LOCK_LINE)
+#define	sx_slock(sx)		__sx_slock((sx), LOCK_FILE, LOCK_LINE)
+#define	sx_sunlock(sx)		__sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
+#endif	/* LOCK_DEBUG > 0 || SX_NOINLINE */
 #define	sx_try_slock(sx)	_sx_try_slock((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_try_xlock(sx)	_sx_try_xlock((sx), LOCK_FILE, LOCK_LINE)
-#define	sx_sunlock(sx)		_sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
-#define	sx_xunlock(sx)		_sx_xunlock((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_try_upgrade(sx)	_sx_try_upgrade((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_downgrade(sx)	_sx_downgrade((sx), LOCK_FILE, LOCK_LINE)
+
+#define	sx_xlocked(sx)							\
+	(((sx)->sx_lock & ~(SX_LOCK_FLAGMASK & ~SX_LOCK_SHARED)) ==	\
+	    (uintptr_t)curthread)
+
 #define	sx_unlock(sx) do {						\
 	if (sx_xlocked(sx))						\
 		sx_xunlock(sx);						\
 	else								\
 		sx_sunlock(sx);						\
 } while (0)
+
 #define	sx_sleep(chan, sx, pri, wmesg, timo)				\
 	_sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo))
 
+/*
+ * Options passed to sx_init_flags().
+ */
+#define	SX_DUPOK		0x01
+#define	SX_NOPROFILE		0x02
+#define	SX_NOWITNESS		0x04
+#define	SX_QUIET		0x08
+#define	SX_ADAPTIVESPIN		0x10
+
+/*
+ * XXX: These options should be renamed as SA_*
+ */
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define	SX_LOCKED		LA_LOCKED
 #define	SX_SLOCKED		LA_SLOCKED
 #define	SX_XLOCKED		LA_XLOCKED
 #define	SX_UNLOCKED		LA_UNLOCKED
+#define	SX_RECURSED		LA_RECURSED
+#define	SX_NOTRECURSED		LA_NOTRECURSED
 #endif
 
 #ifdef INVARIANTS