From 829fae9063685b393b1fc5670abd9c0d2c3686a1 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 8 Jan 2016 19:03:20 +0000 Subject: [PATCH 01/67] Make it possible for sbappend() to preserve M_NOTREADY on mbufs, just like sbappendstream() does. Although, M_NOTREADY may appear only on SOCK_STREAM sockets, due to sendfile(2) supporting only the latter, there is a corner case of AF_UNIX/SOCK_STREAM socket, that still uses records for the sake of control data, albeit being stream socket. Provide private version of m_clrprotoflags(), which understands PRUS_NOTREADY, similar to m_demote(). --- sys/kern/uipc_sockbuf.c | 25 ++++++++++++++++--- sys/kern/uipc_usrreq.c | 2 +- .../bluetooth/socket/ng_btsocket_rfcomm.c | 4 +-- sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c | 2 +- sys/sys/sockbuf.h | 4 +-- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 243450d0d63..ba77fcaca92 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -68,6 +68,23 @@ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); static void sbflush_internal(struct sockbuf *sb); +/* + * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. + */ +static void +sbm_clrprotoflags(struct mbuf *m, int flags) +{ + int mask; + + mask = ~M_PROTOFLAGS; + if (flags & PRUS_NOTREADY) + mask |= M_NOTREADY; + while (m) { + m->m_flags &= mask; + m = m->m_next; + } +} + /* * Mark ready "count" mbufs starting with "m". */ @@ -569,7 +586,7 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line) * are discarded and mbufs are compacted where possible. */ void -sbappend_locked(struct sockbuf *sb, struct mbuf *m) +sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) { struct mbuf *n; @@ -577,7 +594,7 @@ sbappend_locked(struct sockbuf *sb, struct mbuf *m) if (m == 0) return; - m_clrprotoflags(m); + sbm_clrprotoflags(m, flags); SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { @@ -620,11 +637,11 @@ sbappend_locked(struct sockbuf *sb, struct mbuf *m) * are discarded and mbufs are compacted where possible. */ void -sbappend(struct sockbuf *sb, struct mbuf *m) +sbappend(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); - sbappend_locked(sb, m); + sbappend_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index efed37b8aff..e455b1096d9 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -981,7 +981,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, control)) control = NULL; } else - sbappend_locked(&so2->so_rcv, m); + sbappend_locked(&so2->so_rcv, m, flags); break; case SOCK_SEQPACKET: { diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c index f0cd01e0ab4..68c5975c0de 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c @@ -972,7 +972,7 @@ ng_btsocket_rfcomm_send(struct socket *so, int flags, struct mbuf *m, } /* Put the packet on the socket's send queue and wakeup RFCOMM task */ - sbappend(&pcb->so->so_snd, m); + sbappend(&pcb->so->so_snd, m, flags); m = NULL; if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_SENDING)) { @@ -2396,7 +2396,7 @@ ng_btsocket_rfcomm_receive_uih(ng_btsocket_rfcomm_session_p s, int dlci, error = ENOBUFS; } else { /* Append packet to the socket receive queue */ - sbappend(&pcb->so->so_rcv, m0); + sbappend(&pcb->so->so_rcv, m0, 0); m0 = NULL; sorwakeup(pcb->so); diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c index 0b7821266b6..7c6f6cbd1d8 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c @@ -242,7 +242,7 @@ sdp_sock_queue_rcv_mb(struct socket *sk, struct mbuf *mb) SOCKBUF_LOCK(&sk->so_rcv); if (unlikely(h->flags & SDP_OOB_PRES)) sdp_urg(ssk, mb); - sbappend_locked(&sk->so_rcv, mb); + sbappend_locked(&sk->so_rcv, mb, 0); sorwakeup_locked(sk); return mb; } diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index 0e3e172d63f..c6904f65952 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -129,8 +129,8 @@ struct sockbuf { #define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */ #define M_NOTAVAIL (M_NOTREADY | M_BLOCKED) -void sbappend(struct sockbuf *sb, struct mbuf *m); -void sbappend_locked(struct sockbuf *sb, struct mbuf *m); +void sbappend(struct sockbuf *sb, struct mbuf *m, int flags); +void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags); int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, From 6b2f497e48252223d3dc6344db3214bda0dcb8b8 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 19:10:52 +0000 Subject: [PATCH 02/67] - Use attach_md for memory disks so they can be tracked. - Add a geom_concat specific cleanup function and trap on that function at exit so things are cleaned up properly - Don't hardcode /tmp for temporary files, which violates the kyua sandbox MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_concat/conf.sh | 7 +++++++ tools/regression/geom_concat/test-1.t | 15 ++++----------- tools/regression/geom_concat/test-2.t | 19 +++++++------------ 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/tools/regression/geom_concat/conf.sh b/tools/regression/geom_concat/conf.sh index c8692bbc84d..374ed12aecc 100644 --- a/tools/regression/geom_concat/conf.sh +++ b/tools/regression/geom_concat/conf.sh @@ -5,4 +5,11 @@ name="$(mktemp -u concat.XXXXXX)" class="concat" base=`basename $0` +gconcat_test_cleanup() +{ + [ -c /dev/$class/$name ] && gconcat destroy $name + geom_test_cleanup +} +trap gconcat_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh diff --git a/tools/regression/geom_concat/test-1.t b/tools/regression/geom_concat/test-1.t index 8984f79b104..ef80a61bc18 100644 --- a/tools/regression/geom_concat/test-1.t +++ b/tools/regression/geom_concat/test-1.t @@ -5,13 +5,11 @@ echo '1..1' -us=45 +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 - -gconcat create $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gconcat create $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait # Size of created device should be 1MB + 2MB + 3MB. @@ -23,8 +21,3 @@ if [ $size -eq 6291456 ]; then else echo "not ok - Size is 6291456" fi - -gconcat destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` diff --git a/tools/regression/geom_concat/test-2.t b/tools/regression/geom_concat/test-2.t index 445d9bc712f..95636be860e 100644 --- a/tools/regression/geom_concat/test-2.t +++ b/tools/regression/geom_concat/test-2.t @@ -5,18 +5,17 @@ echo '1..1' -us=45 tsize=6 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 dd if=/dev/random of=${src} bs=1m count=$tsize >/dev/null 2>&1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 - -gconcat create $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gconcat create $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait dd if=${src} of=/dev/concat/${name} bs=1m count=$tsize >/dev/null 2>&1 @@ -28,8 +27,4 @@ else echo "ok - md5 checksum comparison" fi -gconcat destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` rm -f ${src} ${dst} From 873e5b5e14755d63b9120886657f0b8173ca3a95 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 8 Jan 2016 19:12:26 +0000 Subject: [PATCH 03/67] Reduce libstand Makefile duplication Userboot's copy of the libstand Makefile had more extensive changes compared to the one in sys/boot/libstand32, but it turns out these are not intentional and we can just include lib/libstand/Makefile as done for libstand32 in r293040. Reviewed by: imp, jhb Tested by: allanjude MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D4793 --- sys/boot/userboot/libstand/Makefile | 134 ++-------------------------- 1 file changed, 5 insertions(+), 129 deletions(-) diff --git a/sys/boot/userboot/libstand/Makefile b/sys/boot/userboot/libstand/Makefile index 401bbdacdbe..d8a1100f845 100644 --- a/sys/boot/userboot/libstand/Makefile +++ b/sys/boot/userboot/libstand/Makefile @@ -1,136 +1,12 @@ # $FreeBSD$ -# Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ -# -# Notes: -# - We don't use the libc strerror/sys_errlist because the string table is -# quite large. -# -MAN= - -.include -MK_SSP= no +.include LIBSTAND_SRC= ${.CURDIR}/../../../../lib/libstand -LIBC_SRC= ${LIBSTAND_SRC}/../libc -.PATH: ${LIBSTAND_SRC} -LIB= stand INTERNALLIB= -MK_PROFILE= no -NO_PIC= +INCS= +MAN= +.PATH: ${LIBSTAND_SRC} -WARNS?= 0 - -# standalone components and stuff we have modified locally -SRCS+= gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c gets.c \ - globals.c pager.c printf.c strdup.c strerror.c strtol.c strtoul.c random.c \ - sbrk.c twiddle.c zalloc.c zalloc_malloc.c - -# private (pruned) versions of libc string functions -SRCS+= strcasecmp.c - -.PATH: ${LIBC_SRC}/net - -SRCS+= ntoh.c - -# string functions from libc -.PATH: ${LIBC_SRC}/string -.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "powerpc" || \ - ${MACHINE_CPUARCH} == "sparc64" || ${MACHINE_CPUARCH} == "amd64" || \ - ${MACHINE_CPUARCH} == "arm" -SRCS+= bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \ - memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \ - strcspn.c strlen.c strncat.c strncmp.c strncpy.c strpbrk.c \ - strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c -.endif -.if ${MACHINE_CPUARCH} == "arm" -.PATH: ${LIBC_SRC}/arm/gen -SRCS+= divsi3.S -.endif -.if ${MACHINE_CPUARCH} == "powerpc" -.PATH: ${LIBC_SRC}/quad -SRCS+= ashldi3.c ashrdi3.c -.PATH: ${LIBC_SRC}/powerpc/gen -SRCS+= syncicache.c -.endif - -# uuid functions from libc -.PATH: ${LIBC_SRC}/uuid -SRCS+= uuid_equal.c uuid_is_nil.c - -# _setjmp/_longjmp -.if ${MACHINE_CPUARCH} == "amd64" -.PATH: ${LIBSTAND_SRC}/amd64 -.elif ${MACHINE_ARCH} == "powerpc64" -.PATH: ${LIBSTAND_SRC}/powerpc -.else -.PATH: ${LIBSTAND_SRC}/${MACHINE_CPUARCH} -.endif -SRCS+= _setjmp.S - -# decompression functionality from libbz2 -# NOTE: to actually test this functionality after libbz2 upgrade compile -# loader(8) with LOADER_BZIP2_SUPPORT defined -.PATH: ${LIBSTAND_SRC}/../../contrib/bzip2 -CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS -SRCS+= libstand_bzlib_private.h - -.for file in bzlib.c crctable.c decompress.c huffman.c randtable.c -SRCS+= _${file} -CLEANFILES+= _${file} - -_${file}: ${file} - sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \ - ${.ALLSRC} > ${.TARGET} -.endfor - -CLEANFILES+= libstand_bzlib_private.h -libstand_bzlib_private.h: bzlib_private.h - sed -e 's||"stand.h"|' \ - ${.ALLSRC} > ${.TARGET} - -# decompression functionality from libz -.PATH: ${LIBSTAND_SRC}/../libz -CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz -SRCS+= adler32.c crc32.c libstand_zutil.h libstand_gzguts.h - -.for file in infback.c inffast.c inflate.c inftrees.c zutil.c -SRCS+= _${file} -CLEANFILES+= _${file} - -_${file}: ${file} - sed -e "s|zutil\.h|libstand_zutil.h|" \ - -e "s|gzguts\.h|libstand_gzguts.h|" \ - ${.ALLSRC} > ${.TARGET} -.endfor - -# depend on stand.h being able to be included multiple times -.for file in zutil.h gzguts.h -CLEANFILES+= libstand_${file} -libstand_${file}: ${file} - sed -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - ${.ALLSRC} > ${.TARGET} -.endfor - -# io routines -SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ - fstat.c close.c lseek.c open.c read.c write.c readdir.c - -# network routines -SRCS+= arp.c ether.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c - -# network info services: -SRCS+= bootp.c rarp.c bootparam.c - -# boot filesystems -SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c -SRCS+= dosfs.c ext2fs.c -SRCS+= splitfs.c - -.include -.include +.include "${LIBSTAND_SRC}/Makefile" From 06908bcb39ce166aa0498599bc046759b780ff74 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 19:38:59 +0000 Subject: [PATCH 04/67] - Add a conf.sh file for executing common functions with geli -- Use linear probing to find the first unique md(4) device, unlike the other code which uses attach_md, as geli(8) allocates the md(4) devices itself - Don't hardcode /tmp for temporary files, which violates the kyua sandbox MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_eli/attach-d.t | 6 +++--- tools/regression/geom_eli/conf.sh | 21 +++++++++++++++++++++ tools/regression/geom_eli/configure-b-B.t | 5 ++--- tools/regression/geom_eli/delkey.t | 12 ++++++------ tools/regression/geom_eli/detach-l.t | 6 +++--- tools/regression/geom_eli/init-B.t | 12 +++++------- tools/regression/geom_eli/init-J.t | 12 ++++++------ tools/regression/geom_eli/init-a.t | 7 ++++--- tools/regression/geom_eli/init-i-P.t | 6 +++--- tools/regression/geom_eli/init.t | 7 ++++--- tools/regression/geom_eli/integrity-copy.t | 7 ++++--- tools/regression/geom_eli/integrity-data.t | 7 ++++--- tools/regression/geom_eli/integrity-hmac.t | 7 ++++--- tools/regression/geom_eli/kill.t | 8 ++++---- tools/regression/geom_eli/nokey.t | 6 +++--- tools/regression/geom_eli/onetime-a.t | 5 +++-- tools/regression/geom_eli/onetime-d.t | 3 ++- tools/regression/geom_eli/onetime.t | 5 +++-- tools/regression/geom_eli/readonly.t | 5 +++-- tools/regression/geom_eli/resize.t | 8 +++----- tools/regression/geom_eli/setkey.t | 16 ++++++++-------- 21 files changed, 98 insertions(+), 73 deletions(-) create mode 100755 tools/regression/geom_eli/conf.sh diff --git a/tools/regression/geom_eli/attach-d.t b/tools/regression/geom_eli/attach-d.t index 4c4789e0f5f..5d700b3270c 100644 --- a/tools/regression/geom_eli/attach-d.t +++ b/tools/regression/geom_eli/attach-d.t @@ -1,10 +1,11 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..3" @@ -34,5 +35,4 @@ else echo "not ok 3" fi -mdconfig -d -u $no rm -f $keyfile diff --git a/tools/regression/geom_eli/conf.sh b/tools/regression/geom_eli/conf.sh new file mode 100755 index 00000000000..0646e83df40 --- /dev/null +++ b/tools/regression/geom_eli/conf.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# $FreeBSD$ + +class="eli" +base=`basename $0` + +# We need to use linear probing in order to detect the first available md(4) +# device instead of using mdconfig -a -t, because geli(8) attachs md(4) devices +no=0 +while [ -c /dev/md$no ]; do + : $(( no += 1 )) +done + +geli_test_cleanup() +{ + [ -c /dev/md${no}.eli ] && geli detach md${no}.eli + mdconfig -d -u $no +} +trap geli_test_cleanup ABRT EXIT INT TERM + +. `dirname $0`/../geom_subr.sh diff --git a/tools/regression/geom_eli/configure-b-B.t b/tools/regression/geom_eli/configure-b-B.t index 23aa4124fb7..b6cdf4fe1d9 100644 --- a/tools/regression/geom_eli/configure-b-B.t +++ b/tools/regression/geom_eli/configure-b-B.t @@ -1,8 +1,9 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 @@ -126,5 +127,3 @@ if [ $? -eq 0 ]; then else echo "not ok 17" fi - -mdconfig -d -u $no diff --git a/tools/regression/geom_eli/delkey.t b/tools/regression/geom_eli/delkey.t index a828622bb78..67b253efd0d 100644 --- a/tools/regression/geom_eli/delkey.t +++ b/tools/regression/geom_eli/delkey.t @@ -1,13 +1,14 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile3=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile4=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +keyfile2=`mktemp $base.XXXXXX` || exit 1 +keyfile3=`mktemp $base.XXXXXX` || exit 1 +keyfile4=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..14" @@ -136,5 +137,4 @@ else echo "not ok 14" fi -mdconfig -d -u $no rm -f $keyfile1 $keyfile2 $keyfile3 $keyfile4 diff --git a/tools/regression/geom_eli/detach-l.t b/tools/regression/geom_eli/detach-l.t index dfa3269a6db..605ae94e6bf 100644 --- a/tools/regression/geom_eli/detach-l.t +++ b/tools/regression/geom_eli/detach-l.t @@ -1,10 +1,11 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..4" @@ -40,5 +41,4 @@ else echo "not ok 4" fi -mdconfig -d -u $no rm -f $keyfile diff --git a/tools/regression/geom_eli/init-B.t b/tools/regression/geom_eli/init-B.t index 36ab87344fc..3ba743cfbc6 100644 --- a/tools/regression/geom_eli/init-B.t +++ b/tools/regression/geom_eli/init-B.t @@ -1,11 +1,12 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -backupfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +backupfile=`mktemp $base.XXXXXX` || exit 1 echo "1..13" @@ -99,8 +100,5 @@ if [ -c /dev/md${no}.eli ]; then else echo "not ok 13 - -B file" fi -geli detach md${no} -rm -f $backupfile -mdconfig -d -u $no -rm -f $keyfile +rm -f $backupfile $keyfile diff --git a/tools/regression/geom_eli/init-J.t b/tools/regression/geom_eli/init-J.t index 6f2862f92bf..266a3d537e0 100644 --- a/tools/regression/geom_eli/init-J.t +++ b/tools/regression/geom_eli/init-J.t @@ -1,13 +1,14 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile0=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -passfile0=`mktemp /tmp/$base.XXXXXX` || exit 1 -passfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile0=`mktemp $base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +passfile0=`mktemp $base.XXXXXX` || exit 1 +passfile1=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..150" @@ -122,5 +123,4 @@ for iter in -1 0 64; do echo "ok ${i}"; i=$((i+1)) done -mdconfig -d -u $no rm -f ${keyfile0} ${keyfile1} ${passfile0} ${passfile1} diff --git a/tools/regression/geom_eli/init-a.t b/tools/regression/geom_eli/init-a.t index 87612a20bad..dbb24fe3248 100644 --- a/tools/regression/geom_eli/init-a.t +++ b/tools/regression/geom_eli/init-a.t @@ -1,10 +1,11 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 echo "1..1380" @@ -26,7 +27,7 @@ for cipher in aes:0 aes:128 aes:256 \ keylen=${cipher##*:} for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 diff --git a/tools/regression/geom_eli/init-i-P.t b/tools/regression/geom_eli/init-i-P.t index a06f9f8165c..1c59a97d391 100644 --- a/tools/regression/geom_eli/init-i-P.t +++ b/tools/regression/geom_eli/init-i-P.t @@ -1,10 +1,11 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..1" @@ -18,5 +19,4 @@ else echo "not ok 1" fi -mdconfig -d -u $no rm -f $keyfile diff --git a/tools/regression/geom_eli/init.t b/tools/regression/geom_eli/init.t index 6934443d702..71dd6e26797 100644 --- a/tools/regression/geom_eli/init.t +++ b/tools/regression/geom_eli/init.t @@ -1,10 +1,11 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 echo "1..460" @@ -25,7 +26,7 @@ for cipher in aes:0 aes:128 aes:256 \ ealgo=${cipher%%:*} keylen=${cipher##*:} for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1 diff --git a/tools/regression/geom_eli/integrity-copy.t b/tools/regression/geom_eli/integrity-copy.t index b52730a7b80..4c8efd32aca 100644 --- a/tools/regression/geom_eli/integrity-copy.t +++ b/tools/regression/geom_eli/integrity-copy.t @@ -1,11 +1,12 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -sector=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +sector=`mktemp $base.XXXXXX` || exit 1 echo "1..5520" diff --git a/tools/regression/geom_eli/integrity-data.t b/tools/regression/geom_eli/integrity-data.t index 69754664f61..7ea7c96b8ac 100644 --- a/tools/regression/geom_eli/integrity-data.t +++ b/tools/regression/geom_eli/integrity-data.t @@ -1,11 +1,12 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -sector=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +sector=`mktemp $base.XXXXXX` || exit 1 echo "1..2760" diff --git a/tools/regression/geom_eli/integrity-hmac.t b/tools/regression/geom_eli/integrity-hmac.t index 33f60295b5f..243eac9ec65 100644 --- a/tools/regression/geom_eli/integrity-hmac.t +++ b/tools/regression/geom_eli/integrity-hmac.t @@ -1,11 +1,12 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 -sector=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 +sector=`mktemp $base.XXXXXX` || exit 1 echo "1..2760" diff --git a/tools/regression/geom_eli/kill.t b/tools/regression/geom_eli/kill.t index 5c315f3fe89..ccced9f4739 100644 --- a/tools/regression/geom_eli/kill.t +++ b/tools/regression/geom_eli/kill.t @@ -1,11 +1,12 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +keyfile2=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..9" @@ -93,5 +94,4 @@ else echo "not ok 9" fi -mdconfig -d -u $no rm -f $keyfile1 $keyfile2 diff --git a/tools/regression/geom_eli/nokey.t b/tools/regression/geom_eli/nokey.t index 19ef6804385..f32e1a4f1eb 100644 --- a/tools/regression/geom_eli/nokey.t +++ b/tools/regression/geom_eli/nokey.t @@ -1,10 +1,11 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..8" @@ -61,5 +62,4 @@ else echo "not ok 8" fi -mdconfig -d -u $no rm -f $keyfile diff --git a/tools/regression/geom_eli/onetime-a.t b/tools/regression/geom_eli/onetime-a.t index ab193018a83..4e26dfbc608 100644 --- a/tools/regression/geom_eli/onetime-a.t +++ b/tools/regression/geom_eli/onetime-a.t @@ -1,8 +1,9 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 echo "1..1380" @@ -25,7 +26,7 @@ for cipher in aes:0 aes:128 aes:256 \ keylen=${cipher##*:} for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1 geli onetime -a $aalgo -e $ealgo -l $keylen -s $secsize md${no} 2>/dev/null diff --git a/tools/regression/geom_eli/onetime-d.t b/tools/regression/geom_eli/onetime-d.t index d49cb642e64..51a6abb8c97 100644 --- a/tools/regression/geom_eli/onetime-d.t +++ b/tools/regression/geom_eli/onetime-d.t @@ -1,8 +1,9 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 mdconfig -a -t malloc -s $sectors -u $no || exit 1 diff --git a/tools/regression/geom_eli/onetime.t b/tools/regression/geom_eli/onetime.t index 3a7d67e04b4..17061d058e7 100644 --- a/tools/regression/geom_eli/onetime.t +++ b/tools/regression/geom_eli/onetime.t @@ -1,8 +1,9 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 echo "1..460" @@ -24,7 +25,7 @@ for cipher in aes:0 aes:128 aes:256 \ ealgo=${cipher%%:*} keylen=${cipher##*:} for secsize in 512 1024 2048 4096 8192; do - rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 + rnd=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $secsize \* $sectors`b -u $no || exit 1 geli onetime -e $ealgo -l $keylen -s $secsize md${no} 2>/dev/null diff --git a/tools/regression/geom_eli/readonly.t b/tools/regression/geom_eli/readonly.t index 210a3643735..721ad62f3b5 100644 --- a/tools/regression/geom_eli/readonly.t +++ b/tools/regression/geom_eli/readonly.t @@ -1,10 +1,11 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1 +keyfile=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..11" diff --git a/tools/regression/geom_eli/resize.t b/tools/regression/geom_eli/resize.t index 86ee36475fb..67d62917bf2 100644 --- a/tools/regression/geom_eli/resize.t +++ b/tools/regression/geom_eli/resize.t @@ -1,7 +1,8 @@ -#! /bin/sh -# +#!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + echo 1..27 BLK=512 @@ -22,8 +23,6 @@ setsize() { # Initialise -kldload geom_eli >/dev/null 2>&1 - setsize 10 40 || echo -n "not " echo ok $i - "Sized ${md}a to 10m" i=$((i + 1)) @@ -145,6 +144,5 @@ echo ok $i - "Attached ${md}p1.eli" i=$((i + 1)) geli detach ${md}p1.eli -mdconfig -du$unit rm tmp.* diff --git a/tools/regression/geom_eli/setkey.t b/tools/regression/geom_eli/setkey.t index 611471a19be..458100c7da8 100644 --- a/tools/regression/geom_eli/setkey.t +++ b/tools/regression/geom_eli/setkey.t @@ -1,15 +1,16 @@ #!/bin/sh # $FreeBSD$ +. $(dirname $0)/conf.sh + base=`basename $0` -no=45 sectors=100 -rnd=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile3=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile4=`mktemp /tmp/$base.XXXXXX` || exit 1 -keyfile5=`mktemp /tmp/$base.XXXXXX` || exit 1 +rnd=`mktemp $base.XXXXXX` || exit 1 +keyfile1=`mktemp $base.XXXXXX` || exit 1 +keyfile2=`mktemp $base.XXXXXX` || exit 1 +keyfile3=`mktemp $base.XXXXXX` || exit 1 +keyfile4=`mktemp $base.XXXXXX` || exit 1 +keyfile5=`mktemp $base.XXXXXX` || exit 1 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1 echo "1..16" @@ -152,5 +153,4 @@ else echo "not ok 16" fi -mdconfig -d -u $no rm -f $keyfile1 $keyfile2 $keyfile3 $keyfile4 $keyfile5 From 72670c33c2033c227800fabab27fa86704f8aa79 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 19:43:18 +0000 Subject: [PATCH 05/67] - Add a conf.sh file for executing common functions with gnop - Use attach_md for attaching md(4) devices - Don't hardcode /tmp for temporary files, which violates the kyua sandbox MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_nop/conf.sh | 7 +++++++ tools/regression/geom_nop/test-1.t | 11 +++-------- tools/regression/geom_nop/test-2.t | 15 ++++++--------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/tools/regression/geom_nop/conf.sh b/tools/regression/geom_nop/conf.sh index 0dc979713c1..e38e10c9afe 100644 --- a/tools/regression/geom_nop/conf.sh +++ b/tools/regression/geom_nop/conf.sh @@ -4,4 +4,11 @@ class="nop" base=`basename $0` +gnop_test_cleanup() +{ + [ -c /dev/${us}.nop ] && gnop destroy ${us}.nop + geom_test_cleanup +} +trap gnop_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh diff --git a/tools/regression/geom_nop/test-1.t b/tools/regression/geom_nop/test-1.t index f08f71da8bc..4d6b65d5ff8 100644 --- a/tools/regression/geom_nop/test-1.t +++ b/tools/regression/geom_nop/test-1.t @@ -5,21 +5,16 @@ echo "1..1" -us=45 +us=$(attach_md -t malloc -s 1M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 - -gnop create /dev/md${us} || exit 1 +gnop create /dev/${us} || exit 1 # Size of created device should be 1MB. -size=`diskinfo /dev/md${us}.nop | awk '{print $3}'` +size=`diskinfo /dev/${us}.nop | awk '{print $3}'` if [ $size -eq 1048576 ]; then echo "ok 1" else echo "not ok 1" fi - -gnop destroy md${us}.nop -mdconfig -d -u $us diff --git a/tools/regression/geom_nop/test-2.t b/tools/regression/geom_nop/test-2.t index e0ddb17abf8..742234591db 100644 --- a/tools/regression/geom_nop/test-2.t +++ b/tools/regression/geom_nop/test-2.t @@ -3,20 +3,19 @@ . `dirname $0`/conf.sh -us=45 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 echo "1..1" dd if=/dev/random of=${src} bs=1m count=1 >/dev/null 2>&1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 +us=$(attach_md -t malloc -s 1M) || exit 1 -gnop create /dev/md${us} || exit 1 +gnop create /dev/${us} || exit 1 -dd if=${src} of=/dev/md${us}.nop bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/md${us}.nop of=${dst} bs=1m count=1 >/dev/null 2>&1 +dd if=${src} of=/dev/${us}.nop bs=1m count=1 >/dev/null 2>&1 +dd if=/dev/${us}.nop of=${dst} bs=1m count=1 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" @@ -24,6 +23,4 @@ else echo "ok 1" fi -gnop destroy md${us}.nop -mdconfig -d -u $us rm -f ${src} ${dst} From 5f119e8d133b7f2bced11222d53fdca832dd84f5 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 19:47:49 +0000 Subject: [PATCH 06/67] - Add a geom_raid3 specific cleanup function and trap on that function at exit so things are cleaned up properly - Use attach_md for attaching md(4) devices - Don't hardcode /tmp for temporary files, which violates the kyua sandbox MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_raid3/conf.sh | 7 +++++++ tools/regression/geom_raid3/test-1.t | 17 ++++------------ tools/regression/geom_raid3/test-10.t | 21 +++++++------------- tools/regression/geom_raid3/test-11.t | 21 +++++++------------- tools/regression/geom_raid3/test-12.t | 28 ++++++++++----------------- tools/regression/geom_raid3/test-2.t | 21 +++++++------------- tools/regression/geom_raid3/test-3.t | 21 +++++++------------- tools/regression/geom_raid3/test-4.t | 21 +++++++------------- tools/regression/geom_raid3/test-5.t | 21 +++++++------------- tools/regression/geom_raid3/test-6.t | 23 ++++++++-------------- tools/regression/geom_raid3/test-7.t | 25 +++++++++--------------- tools/regression/geom_raid3/test-8.t | 23 ++++++++-------------- tools/regression/geom_raid3/test-9.t | 25 +++++++++--------------- 13 files changed, 97 insertions(+), 177 deletions(-) diff --git a/tools/regression/geom_raid3/conf.sh b/tools/regression/geom_raid3/conf.sh index ff6485c9d03..f1b270aacd4 100644 --- a/tools/regression/geom_raid3/conf.sh +++ b/tools/regression/geom_raid3/conf.sh @@ -5,4 +5,11 @@ name="$(mktemp -u graid3.XXXXXX)" class="raid3" base=`basename $0` +graid3_test_cleanup() +{ + [ -c /dev/$class/$name ] && graid3 stop $name + geom_test_cleanup +} +trap graid3_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh diff --git a/tools/regression/geom_raid3/test-1.t b/tools/regression/geom_raid3/test-1.t index 92029678847..4c0b4a230c6 100644 --- a/tools/regression/geom_raid3/test-1.t +++ b/tools/regression/geom_raid3/test-1.t @@ -5,15 +5,11 @@ echo "1..2" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us0 || exit 1 -mdconfig -a -t malloc -s 2M -u $us1 || exit 1 -mdconfig -a -t malloc -s 3M -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} 2>/dev/null || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} 2>/dev/null || exit 1 devwait # Size of created device should be 2MB - 1024B. @@ -30,8 +26,3 @@ if [ $sectorsize -eq 1024 ]; then else echo "not ok 2" fi - -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 diff --git a/tools/regression/geom_raid3/test-10.t b/tools/regression/geom_raid3/test-10.t index 13201200060..edd827d03dd 100644 --- a/tools/regression/geom_raid3/test-10.t +++ b/tools/regression/geom_raid3/test-10.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label -r $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label -r $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 @@ -32,8 +29,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-11.t b/tools/regression/geom_raid3/test-11.t index 3382214b8e8..0407261ca7d 100644 --- a/tools/regression/geom_raid3/test-11.t +++ b/tools/regression/geom_raid3/test-11.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label -w $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label -w $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 @@ -32,8 +29,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-12.t b/tools/regression/geom_raid3/test-12.t index 3dec406f809..10533c6710d 100644 --- a/tools/regression/geom_raid3/test-12.t +++ b/tools/regression/geom_raid3/test-12.t @@ -5,31 +5,28 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` nblocks1=9 nblocks2=`expr $nblocks1 - 1` nblocks3=`expr $nblocks2 / 2` -mdconfig -a -t malloc -s $nblocks1 -u $us0 || exit 1 -mdconfig -a -t malloc -s $nblocks1 -u $us1 || exit 1 -mdconfig -a -t malloc -s $nblocks1 -u $us2 || exit 1 +us0=$(attach_md -t malloc -s $nblocks1) || exit 1 +us1=$(attach_md -t malloc -s $nblocks1) || exit 1 +us2=$(attach_md -t malloc -s $nblocks1) || exit 1 -dd if=/dev/random of=/dev/md${us0} count=$nblocks1 >/dev/null 2>&1 -dd if=/dev/random of=/dev/md${us1} count=$nblocks1 >/dev/null 2>&1 -dd if=/dev/random of=/dev/md${us2} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us0} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us1} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us2} count=$nblocks1 >/dev/null 2>&1 -graid3 label -w $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label -w $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # Wait for synchronization. sleep 2 graid3 stop $name # Break one component. -dd if=/dev/random of=/dev/md${us1} count=$nblocks2 >/dev/null 2>&1 +dd if=/dev/random of=/dev/${us1} count=$nblocks2 >/dev/null 2>&1 # Provoke retaste of the rest components. -true > /dev/md${us0} -true > /dev/md${us2} +true > /dev/${us0} +true > /dev/${us2} sleep 1 dd if=/dev/raid3/${name} of=/dev/null bs=1k count=$nblocks3 >/dev/null 2>&1 @@ -39,8 +36,3 @@ if [ $ec -eq 0 ]; then else echo "ok 1" fi - -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 diff --git a/tools/regression/geom_raid3/test-2.t b/tools/regression/geom_raid3/test-2.t index 88daf782492..22ebd38f698 100644 --- a/tools/regression/geom_raid3/test-2.t +++ b/tools/regression/geom_raid3/test-2.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 @@ -32,8 +29,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-3.t b/tools/regression/geom_raid3/test-3.t index d2c1a5f006b..f068b064242 100644 --- a/tools/regression/geom_raid3/test-3.t +++ b/tools/regression/geom_raid3/test-3.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 @@ -36,8 +33,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-4.t b/tools/regression/geom_raid3/test-4.t index d437ec05246..810f13db406 100644 --- a/tools/regression/geom_raid3/test-4.t +++ b/tools/regression/geom_raid3/test-4.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # @@ -36,8 +33,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-5.t b/tools/regression/geom_raid3/test-5.t index ffd85f6f15f..7bc8d42eb18 100644 --- a/tools/regression/geom_raid3/test-5.t +++ b/tools/regression/geom_raid3/test-5.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # @@ -36,8 +33,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-6.t b/tools/regression/geom_raid3/test-6.t index 97b55036d73..20bf1922bca 100644 --- a/tools/regression/geom_raid3/test-6.t +++ b/tools/regression/geom_raid3/test-6.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 @@ -29,7 +26,7 @@ dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 # Rebuild of DATA component. # graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 graid3 insert -n 1 $name md${us1} sleep 1 @@ -40,8 +37,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-7.t b/tools/regression/geom_raid3/test-7.t index 3d89873f8e2..23666f8f4c5 100644 --- a/tools/regression/geom_raid3/test-7.t +++ b/tools/regression/geom_raid3/test-7.t @@ -5,22 +5,19 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 @@ -29,12 +26,12 @@ dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 # Rebuild of PARITY component. # graid3 remove -n 2 $name -dd if=/dev/zero of=/dev/md${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 graid3 insert -n 2 $name md${us2} sleep 1 # Remove DATA component, so PARITY component can be used while reading. graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then @@ -43,8 +40,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-8.t b/tools/regression/geom_raid3/test-8.t index b9621f43ef9..2eb9b1a7dc9 100644 --- a/tools/regression/geom_raid3/test-8.t +++ b/tools/regression/geom_raid3/test-8.t @@ -5,29 +5,26 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # # Writing without DATA component and rebuild of DATA component. # graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 graid3 insert -n 1 $name md${us1} sleep 1 @@ -39,8 +36,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_raid3/test-9.t b/tools/regression/geom_raid3/test-9.t index 069501161dc..0ef010a00a7 100644 --- a/tools/regression/geom_raid3/test-9.t +++ b/tools/regression/geom_raid3/test-9.t @@ -5,35 +5,32 @@ echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 + +us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 +us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 - -graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait # # Writing without PARITY component and rebuild of PARITY component. # graid3 remove -n 2 $name -dd if=/dev/zero of=/dev/md${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 graid3 insert -n 2 $name md${us2} sleep 1 # Remove DATA component, so PARITY component can be used while reading. graid3 remove -n 1 $name -dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then @@ -42,8 +39,4 @@ else echo "ok 1" fi -graid3 stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} From 2bab0c553588a7c2a8dd59a0f20b9b5ded528274 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 8 Jan 2016 20:34:57 +0000 Subject: [PATCH 07/67] New sendfile(2) syscall. A joint effort of NGINX and Netflix from 2013 and up to now. The new sendfile is the code that Netflix uses to send their multiple tens of gigabits of data per second. The new implementation features asynchronous I/O, when I/O operations are launched, but not awaited to be complete. An explanation of why such behavior is beneficial compared to old one is going to be too long for a commit message, so we will skip it here. Additional features of new syscall are extra flags, which provide an application more control over data sent. The SF_NOCACHE flag tells kernel that data shouldn't be cached after it was sent. The SF_READAHEAD() macro allows to specify readahead size in pages. The new syscalls is a drop in replacement. No modifications are required to applications. One can take nginx binary for stable/10 and run it successfully on head. Although SF_NODISKIO lost its original sense, as now sendfile doesn't block, and now means something completely different (tm), using the new sendfile the old way is absolutely safe. Celebrates: Netflix global launch! Sponsored by: Nginx, Inc. Sponsored by: Netflix Relnotes: yes --- lib/libc/sys/sendfile.2 | 123 +++++-- sys/dev/ti/if_ti.c | 4 +- sys/kern/uipc_mbuf.c | 4 + sys/kern/uipc_syscalls.c | 688 ++++++++++++++++++++++++--------------- sys/sys/mbuf.h | 4 +- sys/sys/sf_buf.h | 7 + sys/sys/socket.h | 5 +- usr.bin/netstat/mbuf.c | 26 +- 8 files changed, 567 insertions(+), 294 deletions(-) diff --git a/lib/libc/sys/sendfile.2 b/lib/libc/sys/sendfile.2 index b363382af15..2b52dd9c883 100644 --- a/lib/libc/sys/sendfile.2 +++ b/lib/libc/sys/sendfile.2 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 7, 2010 +.Dd January 7, 2016 .Dt SENDFILE 2 .Os .Sh NAME @@ -46,7 +46,7 @@ The .Fn sendfile system call -sends a regular file specified by descriptor +sends a regular file or shared memory object specified by descriptor .Fa fd out a stream socket specified by descriptor .Fa s . @@ -101,32 +101,55 @@ the system will write the total number of bytes sent on the socket to the variable pointed to by .Fa sbytes . .Pp -The +The least significant 16 bits of .Fa flags argument is a bitmap of these values: -.Bl -item -offset indent -.It -.Dv SF_NODISKIO . -This flag causes any -.Fn sendfile -call which would block on disk I/O to instead -return -.Er EBUSY . -Busy servers may benefit by transferring requests that would -block to a separate I/O worker thread. -.It -.Dv SF_MNOWAIT . -Do not wait for some kernel resource to become available, -in particular, -.Vt mbuf -and -.Vt sf_buf . -The flag does not make the -.Fn sendfile -syscall truly non-blocking, since other resources are still allocated -in a blocking fashion. -.It -.Dv SF_SYNC . +.Bl -tag -offset indent +.It Dv SF_NODISKIO +This flag causes +.Nm +to return +.Er EBUSY +instead of blocking when a busy page is encountered. +This rare situation can happen if some other process is now working +with the same region of the file. +It is advised to retry the operation after a short period. +.Pp +Note that in older +.Fx +versions the +.Dv SF_NODISKIO +had slightly different notion. +The flag prevented +.Nm +to run I/O operations in case if an invalid (not cached) page is encountered, +thus avoiding blocking on I/O. +Starting with +.Fx 11 +.Nm +sending files off the +.Xr ffs 7 +filesystem doesn't block on I/O +(see +.Sx IMPLEMENTATION NOTES +), so the condition no longer applies. +However, it is safe if an application utilizes +.Dv SF_NODISKIO +and on +.Er EBUSY +performs the same action as it did in +older +.Fx +versions, e.g. +.Xr aio_read 2, +.Xr read 2 +or +.Nm +in a different context. +.It Dv SF_NOCACHE +The data sent to socket will not be cached by the virtual memory system, +and will be freed directly to the pool of free pages. +.It Dv SF_SYNC .Nm sleeps until the network stack no longer references the VM pages of the file, making subsequent modifications to it safe. @@ -134,6 +157,22 @@ Please note that this is not a guarantee that the data has actually been sent. .El .Pp +The most significant 16 bits of +.Fa flags +specify amount of pages that +.Nm +may read ahead when reading the file. +A macro +.Fn SF_FLAGS +is provided to combine readahead amount and flags. +Example shows specifing readahead of 16 pages and +.Dv SF_NOCACHE +flag: +.Pp +.Bd -literal -offset indent -compact + SF_FLAGS(16, SF_NOCACHE) +.Ed +.Pp When using a socket marked for non-blocking I/O, .Fn sendfile may send fewer bytes than requested. @@ -149,6 +188,18 @@ The .Fx implementation of .Fn sendfile +doesn't block on disk I/O when it sends a file off the +.Xr ffs 7 +filesystem. +The syscall returns success before the actual I/O completes, and data +is put into the socket later unattended. +However, the order of data in the socket is preserved, so it is safe +to do further writes to the socket. +.Pp +The +.Fx +implementation of +.Fn sendfile is "zero-copy", meaning that it has been optimized so that copying of the file data is avoided. .Sh TUNING On some architectures, this system call internally uses a special @@ -232,12 +283,10 @@ The argument is not a valid socket descriptor. .It Bq Er EBUSY -Completing the entire transfer would have required disk I/O, so -it was aborted. -Partial data may have been sent. -(This error can only occur when +A busy page was encountered and .Dv SF_NODISKIO -is specified.) +had been specified. +Partial data may have been sent. .It Bq Er EFAULT An invalid address was specified for an argument. .It Bq Er EINTR @@ -310,9 +359,19 @@ first appeared in .Fx 3.0 . This manual page first appeared in .Fx 3.1 . +In +.Fx 10 +support for sending shared memory descriptors had been introduced. +In +.Fx 11 +a non-blocking implementation had been introduced. .Sh AUTHORS -The +The initial implementation of .Fn sendfile system call and this manual page were written by .An David G. Lawrence Aq Mt dg@dglawrence.com . +The +.Fx 11 +implementation was written by +.An Gleb Smirnoff Aq Mt glebius@FreeBSD.org . diff --git a/sys/dev/ti/if_ti.c b/sys/dev/ti/if_ti.c index 9b9c4a173ac..eba7f7b0102 100644 --- a/sys/dev/ti/if_ti.c +++ b/sys/dev/ti/if_ti.c @@ -1634,7 +1634,7 @@ ti_newbuf_jumbo(struct ti_softc *sc, int idx, struct mbuf *m_old) m[i]->m_data = (void *)sf_buf_kva(sf[i]); m[i]->m_len = PAGE_SIZE; MEXTADD(m[i], sf_buf_kva(sf[i]), PAGE_SIZE, - sf_buf_mext, (void*)sf_buf_kva(sf[i]), sf[i], + sf_mext_free, (void*)sf_buf_kva(sf[i]), sf[i], 0, EXT_DISPOSABLE); m[i]->m_next = m[i+1]; } @@ -1699,7 +1699,7 @@ nobufs: if (m[i]) m_freem(m[i]); if (sf[i]) - sf_buf_mext((void *)sf_buf_kva(sf[i]), sf[i]); + sf_mext_free((void *)sf_buf_kva(sf[i]), sf[i]); } return (ENOBUFS); } diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index de6b313f1b4..0b0b6972531 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -338,6 +338,9 @@ mb_free_ext(struct mbuf *m) case EXT_SFBUF: sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; + case EXT_SFBUF_NOCACHE: + sf_ext_free_nocache(m->m_ext.ext_arg1, m->m_ext.ext_arg2); + break; default: KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); @@ -404,6 +407,7 @@ mb_dupcl(struct mbuf *n, const struct mbuf *m) switch (m->m_ext.ext_type) { case EXT_SFBUF: + case EXT_SFBUF_NOCACHE: sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; default: diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index c33a2cf30a2..cac698a97ca 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -113,15 +113,6 @@ static int getpeername1(struct thread *td, struct getpeername_args *uap, counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; -/* - * sendfile(2)-related variables and associated sysctls - */ -static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0, - "sendfile(2) tunables"); -static int sfreadahead = 1; -SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW, - &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks"); - static void sfstat_init(const void *unused) { @@ -1858,13 +1849,12 @@ sf_ext_free(void *arg1, void *arg2) sf_buf_free(sf); vm_page_lock(pg); - vm_page_unwire(pg, PQ_INACTIVE); /* * Check for the object going away on us. This can * happen since we don't hold a reference to it. * If so, we're responsible for freeing the page. */ - if (pg->wire_count == 0 && pg->object == NULL) + if (vm_page_unwire(pg, PQ_INACTIVE) && pg->object == NULL) vm_page_free(pg); vm_page_unlock(pg); @@ -1877,6 +1867,43 @@ sf_ext_free(void *arg1, void *arg2) } } +/* + * Same as above, but forces the page to be detached from the object + * and go into free pool. + */ +void +sf_ext_free_nocache(void *arg1, void *arg2) +{ + struct sf_buf *sf = arg1; + struct sendfile_sync *sfs = arg2; + vm_page_t pg = sf_buf_page(sf); + + sf_buf_free(sf); + + vm_page_lock(pg); + if (vm_page_unwire(pg, PQ_NONE)) { + vm_object_t obj; + + /* Try to free the page, but only if it is cheap to. */ + if ((obj = pg->object) == NULL) + vm_page_free(pg); + else if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) { + vm_page_free(pg); + VM_OBJECT_WUNLOCK(obj); + } else + vm_page_deactivate(pg); + } + vm_page_unlock(pg); + + if (sfs != NULL) { + mtx_lock(&sfs->mtx); + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); + if (--sfs->count == 0) + cv_signal(&sfs->cv); + mtx_unlock(&sfs->mtx); + } +} + /* * sendfile(2) * @@ -1974,103 +2001,252 @@ freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) } #endif /* COMPAT_FREEBSD4 */ -static int -sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, - off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res) + /* + * How much data to put into page i of n. + * Only first and last pages are special. + */ +static inline off_t +xfsize(int i, int n, off_t off, off_t len) { - vm_page_t m; - vm_pindex_t pindex; - ssize_t resid; - int error, readahead, rv; - pindex = OFF_TO_IDX(off); - VM_OBJECT_WLOCK(obj); - m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY | - VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL); + if (i == 0) + return (omin(PAGE_SIZE - (off & PAGE_MASK), len)); - /* - * Check if page is valid for what we need, otherwise initiate I/O. - * - * The non-zero nd argument prevents disk I/O, instead we - * return the caller what he specified in nd. In particular, - * if we already turned some pages into mbufs, nd == EAGAIN - * and the main function send them the pages before we come - * here again and block. - */ - if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) { - if (vp == NULL) - vm_page_xunbusy(m); - VM_OBJECT_WUNLOCK(obj); - *res = m; - return (0); - } else if (nd != 0) { - if (vp == NULL) - vm_page_xunbusy(m); - error = nd; - goto free_page; + if (i == n - 1 && ((off + len) & PAGE_MASK) > 0) + return ((off + len) & PAGE_MASK); + + return (PAGE_SIZE); +} + +/* + * Offset within object for i page. + */ +static inline vm_offset_t +vmoff(int i, off_t off) +{ + + if (i == 0) + return ((vm_offset_t)off); + + return (trunc_page(off + i * PAGE_SIZE)); +} + +/* + * Pretend as if we don't have enough space, subtract xfsize() of + * all pages that failed. + */ +static inline void +fixspace(int old, int new, off_t off, int *space) +{ + + KASSERT(old > new, ("%s: old %d new %d", __func__, old, new)); + + /* Subtract last one. */ + *space -= xfsize(old - 1, old, off, *space); + old--; + + if (new == old) + /* There was only one page. */ + return; + + /* Subtract first one. */ + if (new == 0) { + *space -= xfsize(0, old, off, *space); + new++; } - /* - * Get the page from backing store. - */ - error = 0; - if (vp != NULL) { - VM_OBJECT_WUNLOCK(obj); - readahead = sfreadahead * MAXBSIZE; + /* Rest of pages are full sized. */ + *space -= (old - new) * PAGE_SIZE; + + KASSERT(*space >= 0, ("%s: space went backwards", __func__)); +} + +/* + * Structure describing a single sendfile(2) I/O, which may consist of + * several underlying pager I/Os. + * + * The syscall context allocates the structure and initializes 'nios' + * to 1. As sendfile_swapin() runs through pages and starts asynchronous + * paging operations, it increments 'nios'. + * + * Every I/O completion calls sf_iodone(), which decrements the 'nios', and + * the syscall also calls sf_iodone() after allocating all mbufs, linking them + * and sending to socket. Whoever reaches zero 'nios' is responsible to + * call pru_ready on the socket, to notify it of readyness of the data. + */ +struct sf_io { + volatile u_int nios; + u_int error; + int npages; + struct file *sock_fp; + struct mbuf *m; + vm_page_t pa[]; +}; + +static void +sf_iodone(void *arg, vm_page_t *pg, int count, int error) +{ + struct sf_io *sfio = arg; + struct socket *so; + + for (int i = 0; i < count; i++) + vm_page_xunbusy(pg[i]); + + if (error) + sfio->error = error; + + if (!refcount_release(&sfio->nios)) + return; + + so = sfio->sock_fp->f_data; + + if (sfio->error) { + struct mbuf *m; /* - * Use vn_rdwr() instead of the pager interface for - * the vnode, to allow the read-ahead. + * I/O operation failed. The state of data in the socket + * is now inconsistent, and all what we can do is to tear + * it down. Protocol abort method would tear down protocol + * state, free all ready mbufs and detach not ready ones. + * We will free the mbufs corresponding to this I/O manually. * - * XXXMAC: Because we don't have fp->f_cred here, we - * pass in NOCRED. This is probably wrong, but is - * consistent with our original implementation. + * The socket would be marked with EIO and made available + * for read, so that application receives EIO on next + * syscall and eventually closes the socket. */ - error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off), - UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead / - bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); - SFSTAT_INC(sf_iocnt); - VM_OBJECT_WLOCK(obj); + so->so_proto->pr_usrreqs->pru_abort(so); + so->so_error = EIO; + + m = sfio->m; + for (int i = 0; i < sfio->npages; i++) + m = m_free(m); } else { - if (vm_pager_has_page(obj, pindex, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL); - SFSTAT_INC(sf_iocnt); - if (rv != VM_PAGER_OK) { - vm_page_lock(m); - vm_page_free(m); - vm_page_unlock(m); - m = NULL; - error = EIO; - } - } else { - pmap_zero_page(m); - m->valid = VM_PAGE_BITS_ALL; - m->dirty = 0; - } - if (m != NULL) - vm_page_xunbusy(m); + CURVNET_SET(so->so_vnet); + (void )(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m, + sfio->npages); + CURVNET_RESTORE(); } - if (error == 0) { - *res = m; - } else if (m != NULL) { -free_page: - vm_page_lock(m); - vm_page_unwire(m, PQ_INACTIVE); + + /* XXXGL: curthread */ + fdrop(sfio->sock_fp, curthread); + free(sfio, M_TEMP); +} + +/* + * Iterate through pages vector and request paging for non-valid pages. + */ +static int +sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len, + int npages, int rhpages, int flags) +{ + vm_page_t *pa = sfio->pa; + int nios; + + nios = 0; + flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0; + + /* + * First grab all the pages and wire them. Note that we grab + * only required pages. Readahead pages are dealt with later. + */ + VM_OBJECT_WLOCK(obj); + for (int i = 0; i < npages; i++) { + pa[i] = vm_page_grab(obj, OFF_TO_IDX(vmoff(i, off)), + VM_ALLOC_WIRED | VM_ALLOC_NORMAL | flags); + if (pa[i] == NULL) { + npages = i; + rhpages = 0; + break; + } + } + + for (int i = 0; i < npages;) { + int j, a, count, rv; + + /* Skip valid pages. */ + if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK, + xfsize(i, npages, off, len))) { + vm_page_xunbusy(pa[i]); + SFSTAT_INC(sf_pages_valid); + i++; + continue; + } /* - * See if anyone else might know about this page. If - * not and it is not valid, then free it. + * Now 'i' points to first invalid page, iterate further + * to make 'j' point at first valid after a bunch of + * invalid ones. */ - if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m)) - vm_page_free(m); - vm_page_unlock(m); + for (j = i + 1; j < npages; j++) + if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, + xfsize(j, npages, off, len))) { + SFSTAT_INC(sf_pages_valid); + break; + } + + /* + * Now we got region of invalid pages between 'i' and 'j'. + * Check that they belong to pager. They may not be there, + * which is a regular situation for shmem pager. For vnode + * pager this happens only in case of sparse file. + * + * Important feature of vm_pager_has_page() is the hint + * stored in 'a', about how many pages we can pagein after + * this page in a single I/O. + */ + while (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), + NULL, &a) && i < j) { + pmap_zero_page(pa[i]); + pa[i]->valid = VM_PAGE_BITS_ALL; + pa[i]->dirty = 0; + vm_page_xunbusy(pa[i]); + i++; + } + if (i == j) + continue; + + /* + * We want to pagein as many pages as possible, limited only + * by the 'a' hint and actual request. + * + * We should not pagein into already valid page, thus if + * 'j' didn't reach last page, trim by that page. + * + * When the pagein fulfils the request, also specify readahead. + */ + if (j < npages) + a = min(a, j - i - 1); + count = min(a + 1, npages - i); + + refcount_acquire(&sfio->nios); + rv = vm_pager_get_pages_async(obj, pa + i, count, NULL, + i + count == npages ? &rhpages : NULL, + &sf_iodone, sfio); + KASSERT(rv == VM_PAGER_OK, ("%s: pager fail obj %p page %p", + __func__, obj, pa[i])); + + SFSTAT_INC(sf_iocnt); + SFSTAT_ADD(sf_pages_read, count); + if (i + count == npages) + SFSTAT_ADD(sf_rhpages_read, rhpages); + +#ifdef INVARIANTS + for (j = i; j < i + count && j < npages; j++) + KASSERT(pa[j] == vm_page_lookup(obj, + OFF_TO_IDX(vmoff(j, off))), + ("pa[j] %p lookup %p\n", pa[j], + vm_page_lookup(obj, OFF_TO_IDX(vmoff(j, off))))); +#endif + i += count; + nios++; } - KASSERT(error != 0 || (m->wire_count > 0 && - vm_page_is_valid(m, off & PAGE_MASK, xfsize)), - ("wrong page state m %p off %#jx xfsize %d", m, (uintmax_t)off, - xfsize)); + VM_OBJECT_WUNLOCK(obj); - return (error); + + if (nios == 0 && npages != 0) + SFSTAT_INC(sf_noiocnt); + + return (nios); } static int @@ -2178,80 +2354,65 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct vnode *vp; struct vm_object *obj; struct socket *so; - struct mbuf *m; + struct mbuf *m, *mh, *mhtail; struct sf_buf *sf; - struct vm_page *pg; struct shmfd *shmfd; struct sendfile_sync *sfs; struct vattr va; - off_t off, xfsize, fsbytes, sbytes, rem, obj_size; - int error, bsize, nd, hdrlen, mnw; + off_t off, sbytes, rem, obj_size; + int error, softerr, bsize, hdrlen; - pg = NULL; obj = NULL; so = NULL; - m = NULL; + m = mh = NULL; sfs = NULL; - fsbytes = sbytes = 0; - hdrlen = mnw = 0; - rem = nbytes; - obj_size = 0; + sbytes = 0; + softerr = 0; error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); if (error != 0) return (error); - if (rem == 0) - rem = obj_size; error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so); if (error != 0) goto out; - /* - * Do not wait on memory allocations but return ENOMEM for - * caller to retry later. - * XXX: Experimental. - */ - if (flags & SF_MNOWAIT) - mnw = 1; - - if (flags & SF_SYNC) { - sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); - mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); - cv_init(&sfs->cv, "sendfile"); - } - #ifdef MAC error = mac_socket_check_send(td->td_ucred, so); if (error != 0) goto out; #endif + SFSTAT_INC(sf_syscalls); + SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags)); + + if (flags & SF_SYNC) { + sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); + mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); + cv_init(&sfs->cv, "sendfile"); + } + /* If headers are specified copy them into mbufs. */ - if (hdr_uio != NULL) { + if (hdr_uio != NULL && hdr_uio->uio_resid > 0) { hdr_uio->uio_td = td; hdr_uio->uio_rw = UIO_WRITE; - if (hdr_uio->uio_resid > 0) { - /* - * In FBSD < 5.0 the nbytes to send also included - * the header. If compat is specified subtract the - * header size from nbytes. - */ - if (kflags & SFK_COMPAT) { - if (nbytes > hdr_uio->uio_resid) - nbytes -= hdr_uio->uio_resid; - else - nbytes = 0; - } - m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), - 0, 0, 0); - if (m == NULL) { - error = mnw ? EAGAIN : ENOBUFS; - goto out; - } - hdrlen = m_length(m, NULL); + /* + * In FBSD < 5.0 the nbytes to send also included + * the header. If compat is specified subtract the + * header size from nbytes. + */ + if (kflags & SFK_COMPAT) { + if (nbytes > hdr_uio->uio_resid) + nbytes -= hdr_uio->uio_resid; + else + nbytes = 0; } - } + mh = m_uiotombuf(hdr_uio, M_WAITOK, 0, 0, 0); + hdrlen = m_length(mh, &mhtail); + } else + hdrlen = 0; + + rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset; /* * Protect against multiple writers to the socket. @@ -2272,21 +2433,13 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, * The outer loop checks the state and available space of the socket * and takes care of the overall progress. */ - for (off = offset; ; ) { + for (off = offset; rem > 0; ) { + struct sf_io *sfio; + vm_page_t *pa; struct mbuf *mtail; - int loopbytes; - int space; - int done; - - if ((nbytes != 0 && nbytes == fsbytes) || - (nbytes == 0 && obj_size == fsbytes)) - break; + int nios, space, npages, rhpages; mtail = NULL; - loopbytes = 0; - space = 0; - done = 0; - /* * Check the socket state for ongoing connection, * no errors and space in socket buffer. @@ -2362,49 +2515,58 @@ retry_space: VOP_UNLOCK(vp, 0); goto done; } - obj_size = va.va_size; + if (va.va_size != obj_size) { + if (nbytes == 0) + rem += va.va_size - obj_size; + else if (offset + nbytes > va.va_size) + rem -= (offset + nbytes - va.va_size); + obj_size = va.va_size; + } } + if (space > rem) + space = rem; + + npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE); + + /* + * Calculate maximum allowed number of pages for readahead + * at this iteration. First, we allow readahead up to "rem". + * If application wants more, let it be, but there is no + * reason to go above MAXPHYS. Also check against "obj_size", + * since vm_pager_has_page() can hint beyond EOF. + */ + rhpages = howmany(rem + (off & PAGE_MASK), PAGE_SIZE) - npages; + rhpages += SF_READAHEAD(flags); + rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages); + rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) - + npages, rhpages); + + sfio = malloc(sizeof(struct sf_io) + + npages * sizeof(vm_page_t), M_TEMP, M_WAITOK); + refcount_init(&sfio->nios, 1); + sfio->error = 0; + + nios = sendfile_swapin(obj, sfio, off, space, npages, rhpages, + flags); + /* * Loop and construct maximum sized mbuf chain to be bulk * dumped into socket buffer. */ - while (space > loopbytes) { - vm_offset_t pgoff; + pa = sfio->pa; + for (int i = 0; i < npages; i++) { struct mbuf *m0; /* - * Calculate the amount to transfer. - * Not to exceed a page, the EOF, - * or the passed in nbytes. + * If a page wasn't grabbed successfully, then + * trim the array. Can happen only with SF_NODISKIO. */ - pgoff = (vm_offset_t)(off & PAGE_MASK); - rem = obj_size - offset; - if (nbytes != 0) - rem = omin(rem, nbytes); - rem -= fsbytes + loopbytes; - xfsize = omin(PAGE_SIZE - pgoff, rem); - xfsize = omin(space - loopbytes, xfsize); - if (xfsize <= 0) { - done = 1; /* all data sent */ - break; - } - - /* - * Attempt to look up the page. Allocate - * if not found or wait and loop if busy. - */ - if (m != NULL) - nd = EAGAIN; /* send what we already got */ - else if ((flags & SF_NODISKIO) != 0) - nd = EBUSY; - else - nd = 0; - error = sendfile_readpage(obj, vp, nd, off, - xfsize, bsize, td, &pg); - if (error != 0) { - if (error == EAGAIN) - error = 0; /* not a real error */ + if (pa[i] == NULL) { + SFSTAT_INC(sf_busy); + fixspace(npages, i, off, &space); + npages = i; + softerr = EBUSY; break; } @@ -2417,56 +2579,59 @@ retry_space: * threads might exhaust the buffers and then * deadlock. */ - sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : - SFB_CATCH); + sf = sf_buf_alloc(pa[i], + m != NULL ? SFB_NOWAIT : SFB_CATCH); if (sf == NULL) { SFSTAT_INC(sf_allocfail); - vm_page_lock(pg); - vm_page_unwire(pg, PQ_INACTIVE); - KASSERT(pg->object != NULL, - ("%s: object disappeared", __func__)); - vm_page_unlock(pg); + for (int j = i; j < npages; j++) { + vm_page_lock(pa[j]); + vm_page_unwire(pa[j], PQ_INACTIVE); + vm_page_unlock(pa[j]); + } if (m == NULL) - error = (mnw ? EAGAIN : EINTR); + softerr = ENOBUFS; + fixspace(npages, i, off, &space); + npages = i; break; } - /* - * Get an mbuf and set it up as having - * external storage. - */ - m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); - if (m0 == NULL) { - error = (mnw ? EAGAIN : ENOBUFS); - sf_ext_free(sf, NULL); - break; - } - /* - * Attach EXT_SFBUF external storage. - */ - m0->m_ext.ext_buf = (caddr_t )sf_buf_kva(sf); + m0 = m_get(M_WAITOK, MT_DATA); + m0->m_ext.ext_buf = (char *)sf_buf_kva(sf); m0->m_ext.ext_size = PAGE_SIZE; m0->m_ext.ext_arg1 = sf; m0->m_ext.ext_arg2 = sfs; - m0->m_ext.ext_type = EXT_SFBUF; + /* + * SF_NOCACHE sets the page as being freed upon send. + * However, we ignore it for the last page in 'space', + * if the page is truncated, and we got more data to + * send (rem > space), or if we have readahead + * configured (rhpages > 0). + */ + if ((flags & SF_NOCACHE) == 0 || + (i == npages - 1 && + ((off + space) & PAGE_MASK) && + (rem > space || rhpages > 0))) + m0->m_ext.ext_type = EXT_SFBUF; + else + m0->m_ext.ext_type = EXT_SFBUF_NOCACHE; m0->m_ext.ext_flags = 0; - m0->m_flags |= (M_EXT|M_RDONLY); - m0->m_data = (char *)sf_buf_kva(sf) + pgoff; - m0->m_len = xfsize; + m0->m_flags |= (M_EXT | M_RDONLY); + if (nios) + m0->m_flags |= M_NOTREADY; + m0->m_data = (char *)sf_buf_kva(sf) + + (vmoff(i, off) & PAGE_MASK); + m0->m_len = xfsize(i, npages, off, space); + + if (i == 0) + sfio->m = m0; /* Append to mbuf chain. */ if (mtail != NULL) mtail->m_next = m0; - else if (m != NULL) - m_last(m)->m_next = m0; else m = m0; mtail = m0; - /* Keep track of bits processed. */ - loopbytes += xfsize; - off += xfsize; - if (sfs != NULL) { mtx_lock(&sfs->mtx); sfs->count++; @@ -2477,49 +2642,60 @@ retry_space: if (vp != NULL) VOP_UNLOCK(vp, 0); - /* Add the buffer chain to the socket buffer. */ - if (m != NULL) { - int mlen, err; + /* Keep track of bytes processed. */ + off += space; + rem -= space; - mlen = m_length(m, NULL); - SOCKBUF_LOCK(&so->so_snd); - if (so->so_snd.sb_state & SBS_CANTSENDMORE) { - error = EPIPE; - SOCKBUF_UNLOCK(&so->so_snd); - goto done; - } - SOCKBUF_UNLOCK(&so->so_snd); - CURVNET_SET(so->so_vnet); - /* Avoid error aliasing. */ - err = (*so->so_proto->pr_usrreqs->pru_send) - (so, 0, m, NULL, NULL, td); - CURVNET_RESTORE(); - if (err == 0) { - /* - * We need two counters to get the - * file offset and nbytes to send - * right: - * - sbytes contains the total amount - * of bytes sent, including headers. - * - fsbytes contains the total amount - * of bytes sent from the file. - */ - sbytes += mlen; - fsbytes += mlen; - if (hdrlen) { - fsbytes -= hdrlen; - hdrlen = 0; - } - } else if (error == 0) - error = err; - m = NULL; /* pru_send always consumes */ + /* Prepend header, if any. */ + if (hdrlen) { + mhtail->m_next = m; + m = mh; + mh = NULL; } - /* Quit outer loop on error or when we're done. */ - if (done) - break; - if (error != 0) + if (m == NULL) { + KASSERT(softerr, ("%s: m NULL, no error", __func__)); + error = softerr; + free(sfio, M_TEMP); goto done; + } + + /* Add the buffer chain to the socket buffer. */ + KASSERT(m_length(m, NULL) == space + hdrlen, + ("%s: mlen %u space %d hdrlen %d", + __func__, m_length(m, NULL), space, hdrlen)); + + CURVNET_SET(so->so_vnet); + if (nios == 0) { + /* + * If sendfile_swapin() didn't initiate any I/Os, + * which happens if all data is cached in VM, then + * we can send data right now without the + * PRUS_NOTREADY flag. + */ + free(sfio, M_TEMP); + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, m, NULL, NULL, td); + } else { + sfio->sock_fp = sock_fp; + sfio->npages = npages; + fhold(sock_fp); + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, PRUS_NOTREADY, m, NULL, NULL, td); + sf_iodone(sfio, NULL, 0, 0); + } + CURVNET_RESTORE(); + + m = NULL; /* pru_send always consumes */ + if (error) + goto done; + sbytes += space + hdrlen; + if (hdrlen) + hdrlen = 0; + if (softerr) { + error = softerr; + goto done; + } } /* @@ -2552,6 +2728,8 @@ out: fdrop(sock_fp, td); if (m) m_freem(m); + if (mh) + m_freem(mh); if (sfs != NULL) { mtx_lock(&sfs->mtx); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 8509a6ce49a..91dd8958c08 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -343,12 +343,13 @@ struct mbuf { * External mbuf storage buffer types. */ #define EXT_CLUSTER 1 /* mbuf cluster */ -#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ +#define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */ #define EXT_JUMBOP 3 /* jumbo cluster page sized */ #define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */ +#define EXT_SFBUF_NOCACHE 8 /* sendfile(2)'s sf_buf not to be cached */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ @@ -397,6 +398,7 @@ struct mbuf { */ void sf_ext_ref(void *, void *); void sf_ext_free(void *, void *); +void sf_ext_free_nocache(void *, void *); /* * Flags indicating checksum, segmentation and other offload work to be diff --git a/sys/sys/sf_buf.h b/sys/sys/sf_buf.h index d3bb037c244..b5970d95fd8 100644 --- a/sys/sys/sf_buf.h +++ b/sys/sys/sf_buf.h @@ -31,7 +31,14 @@ #define _SYS_SF_BUF_H_ struct sfstat { /* sendfile statistics */ + uint64_t sf_syscalls; /* times sendfile was called */ + uint64_t sf_noiocnt; /* times sendfile didn't require I/O */ uint64_t sf_iocnt; /* times sendfile had to do disk I/O */ + uint64_t sf_pages_read; /* pages read as part of a request */ + uint64_t sf_pages_valid; /* pages were valid for a request */ + uint64_t sf_rhpages_requested; /* readahead pages requested */ + uint64_t sf_rhpages_read; /* readahead pages read */ + uint64_t sf_busy; /* times aborted on a busy page */ uint64_t sf_allocfail; /* times sfbuf allocation failed */ uint64_t sf_allocwait; /* times sfbuf allocation had to wait */ }; diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 18e2de10e10..ee621d9dd1e 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -587,11 +587,14 @@ struct sf_hdtr { * Sendfile-specific flag(s) */ #define SF_NODISKIO 0x00000001 -#define SF_MNOWAIT 0x00000002 +#define SF_MNOWAIT 0x00000002 /* obsolete */ #define SF_SYNC 0x00000004 +#define SF_NOCACHE 0x00000010 +#define SF_FLAGS(rh, flags) (((rh) << 16) | (flags)) #ifdef _KERNEL #define SFK_COMPAT 0x00000001 +#define SF_READAHEAD(flags) ((flags) >> 16) #endif /* _KERNEL */ #endif /* __BSD_VISIBLE */ diff --git a/usr.bin/netstat/mbuf.c b/usr.bin/netstat/mbuf.c index baf7a653639..8b95ddddd04 100644 --- a/usr.bin/netstat/mbuf.c +++ b/usr.bin/netstat/mbuf.c @@ -326,13 +326,33 @@ mbpr(void *kvmd, u_long mbaddr) kread_counters) != 0) goto out; + xo_emit("{:sendfile-syscalls/%ju} {N:sendfile syscalls}\n", + (uintmax_t)sfstat.sf_syscalls); + xo_emit("{:sendfile-no-io/%ju} " + "{N:sendfile syscalls completed without I\\/O request}\n", + (uintmax_t)sfstat.sf_noiocnt); + xo_emit("{:sendfile-io-count/%ju} " + "{N:requests for I\\/O initiated by sendfile}\n", + (uintmax_t)sfstat.sf_iocnt); + xo_emit("{:sendfile-pages-sent/%ju} " + "{N:pages read by sendfile as part of a request}\n", + (uintmax_t)sfstat.sf_pages_read); + xo_emit("{:sendfile-pages-valid/%ju} " + "{N:pages were valid at time of a sendfile request}\n", + (uintmax_t)sfstat.sf_pages_valid); + xo_emit("{:sendfile-requested-readahead/%ju} " + "{N:pages were requested for read ahead by applications}\n", + (uintmax_t)sfstat.sf_rhpages_requested); + xo_emit("{:sendfile-readahead/%ju} " + "{N:pages were read ahead by sendfile}\n", + (uintmax_t)sfstat.sf_rhpages_read); + xo_emit("{:sendfile-busy-encounters/%ju} " + "{N:times sendfile encountered an already busy page}\n", + (uintmax_t)sfstat.sf_busy); xo_emit("{:sfbufs-alloc-failed/%ju} {N:requests for sfbufs denied}\n", (uintmax_t)sfstat.sf_allocfail); xo_emit("{:sfbufs-alloc-wait/%ju} {N:requests for sfbufs delayed}\n", (uintmax_t)sfstat.sf_allocwait); - xo_emit("{:sfbufs-io-count/%ju} " - "{N:requests for I\\/O initiated by sendfile}\n", - (uintmax_t)sfstat.sf_iocnt); out: xo_close_container("mbuf-statistics"); memstat_mtl_free(mtlp); From 8dc7b23a026ddffb4103e5281eaa28571e22ad9c Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Fri, 8 Jan 2016 21:07:34 +0000 Subject: [PATCH 08/67] Fix upgrading from OSVERSION 1000002-1000032 after r288829. r288829 states that lex requires the latest m4, but was not always building it. Move lex to the same logic as m4 since they are closely tied now. MFC after: 3 days Sponsored by: EMC / Isilon Storage Division Reported by: Slawa Olhovchenkov --- Makefile.inc1 | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Makefile.inc1 b/Makefile.inc1 index f56fc71ddcf..a85f2e355c9 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1425,11 +1425,13 @@ _vtfontcvt= usr.bin/vtfontcvt _sed= usr.bin/sed .endif -.if ${BOOTSTRAPPING} < 1000002 +.if ${BOOTSTRAPPING} < 1000033 _libopenbsd= lib/libopenbsd _m4= usr.bin/m4 +_lex= usr.bin/lex ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd +${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif .if ${BOOTSTRAPPING} < 1000026 @@ -1443,12 +1445,6 @@ ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd _cat= bin/cat .endif -.if ${BOOTSTRAPPING} < 1000033 -_lex= usr.bin/lex - -${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 -.endif - # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support # r285986 crunchen: use STRIPBIN rather than STRIP From 67c3cb72dad16252adadca7be9fb77f005ef9fc5 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 21:25:27 +0000 Subject: [PATCH 09/67] - Add a geom_shsec specific cleanup function and trap on that function at exit so things are cleaned up properly - Use attach_md for attaching md(4) devices - Don't hardcode /tmp for temporary files, which violates the kyua sandbox MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_shsec/conf.sh | 7 +++++++ tools/regression/geom_shsec/test-1.t | 17 ++++------------- tools/regression/geom_shsec/test-2.t | 25 +++++++++---------------- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/tools/regression/geom_shsec/conf.sh b/tools/regression/geom_shsec/conf.sh index dc416db6434..7800eb7764a 100644 --- a/tools/regression/geom_shsec/conf.sh +++ b/tools/regression/geom_shsec/conf.sh @@ -5,4 +5,11 @@ name="$(mktemp -u shsec.XXXXXX)" class="shsec" base=`basename $0` +shsec_test_cleanup() +{ + [ -c /dev/$class/$name ] && gshsec stop $name + geom_test_cleanup +} +trap shsec_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh diff --git a/tools/regression/geom_shsec/test-1.t b/tools/regression/geom_shsec/test-1.t index 5cb5b4faca1..ab0bb37ea44 100644 --- a/tools/regression/geom_shsec/test-1.t +++ b/tools/regression/geom_shsec/test-1.t @@ -5,15 +5,11 @@ echo "1..2" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us0 || exit 1 -mdconfig -a -t malloc -s 2M -u $us1 || exit 1 -mdconfig -a -t malloc -s 3M -u $us2 || exit 1 - -gshsec label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} 2>/dev/null || exit 1 +gshsec label $name /dev/${us0} /dev/${us1} /dev/${us2} 2>/dev/null || exit 1 devwait # Size of created device should be 1MB - 512B. @@ -30,8 +26,3 @@ if [ $sectorsize -eq 512 ]; then else echo "not ok 2" fi - -gshsec stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 diff --git a/tools/regression/geom_shsec/test-2.t b/tools/regression/geom_shsec/test-2.t index 19a4e0aa8a8..9dfe36ace48 100644 --- a/tools/regression/geom_shsec/test-2.t +++ b/tools/regression/geom_shsec/test-2.t @@ -5,21 +5,18 @@ echo "1..4" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` nblocks1=1024 nblocks2=`expr $nblocks1 + 1` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} count=$nblocks1 >/dev/null 2>&1 -mdconfig -a -t malloc -s $nblocks2 -u $us0 || exit 1 -mdconfig -a -t malloc -s $nblocks2 -u $us1 || exit 1 -mdconfig -a -t malloc -s $nblocks2 -u $us2 || exit 1 +us0=$(attach_md -t malloc -s $nblocks2) || exit 1 +us1=$(attach_md -t malloc -s $nblocks2) || exit 1 +us2=$(attach_md -t malloc -s $nblocks2) || exit 1 -gshsec label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gshsec label $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait dd if=${src} of=/dev/shsec/${name} count=$nblocks1 >/dev/null 2>&1 @@ -31,29 +28,25 @@ else echo "ok 1" fi -dd if=/dev/md${us0} of=${dst} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/${us0} of=${dst} count=$nblocks1 >/dev/null 2>&1 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -dd if=/dev/md${us1} of=${dst} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/${us1} of=${dst} count=$nblocks1 >/dev/null 2>&1 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then echo "not ok 3" else echo "ok 3" fi -dd if=/dev/md${us2} of=${dst} count=$nblocks1 >/dev/null 2>&1 +dd if=/dev/${us2} of=${dst} count=$nblocks1 >/dev/null 2>&1 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then echo "not ok 4" else echo "ok 4" fi -gshsec stop $name -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} From b8338417d2b7c7b951257e10335b3da9646247a4 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 21:28:09 +0000 Subject: [PATCH 10/67] - Add a geom_stripe specific cleanup function and trap on that function at exit so things are cleaned up properly - Use attach_md for attaching md(4) devices - Don't hardcode /tmp for temporary files, which violates the kyua sandbox MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_stripe/conf.sh | 7 +++++++ tools/regression/geom_stripe/test-1.t | 15 ++++----------- tools/regression/geom_stripe/test-2.t | 17 ++++++----------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/tools/regression/geom_stripe/conf.sh b/tools/regression/geom_stripe/conf.sh index 54a0c3646c2..fd0f41c6c5e 100644 --- a/tools/regression/geom_stripe/conf.sh +++ b/tools/regression/geom_stripe/conf.sh @@ -5,4 +5,11 @@ name="$(mktemp -u stripe.XXXXXX)" class="stripe" base=`basename $0` +gstripe_test_cleanup() +{ + [ -c /dev/$class/$name ] && gstripe destroy $name + geom_test_cleanup +} +trap gstripe_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh diff --git a/tools/regression/geom_stripe/test-1.t b/tools/regression/geom_stripe/test-1.t index 9b398f095e3..7923763a4c2 100644 --- a/tools/regression/geom_stripe/test-1.t +++ b/tools/regression/geom_stripe/test-1.t @@ -5,13 +5,11 @@ echo "1..1" -us=45 +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 - -gstripe create -s 16384 $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gstripe create -s 16384 $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait # Size of created device should be 1MB * 3. @@ -23,8 +21,3 @@ if [ $size -eq 3145728 ]; then else echo "not ok 1" fi - -gstripe destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` diff --git a/tools/regression/geom_stripe/test-2.t b/tools/regression/geom_stripe/test-2.t index 73937f4e624..f6e11f5a582 100644 --- a/tools/regression/geom_stripe/test-2.t +++ b/tools/regression/geom_stripe/test-2.t @@ -5,18 +5,17 @@ echo "1..1" -us=45 tsize=3 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=1m count=$tsize >/dev/null 2>&1 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1 -mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1 +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -gstripe create -s 8192 $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1 +gstripe create -s 8192 $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait dd if=${src} of=/dev/stripe/${name} bs=1m count=$tsize >/dev/null 2>&1 @@ -28,8 +27,4 @@ else echo "ok 1" fi -gstripe destroy $name -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -mdconfig -d -u `expr $us + 2` rm -f ${src} ${dst} From eb028f7b2defd65faba0fe2e2c94159c8ed4aebf Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 21:38:26 +0000 Subject: [PATCH 11/67] - Make test-1.sh into a TAP testable testcase - Delete test-2.sh as it was an incomplete testcase, and the contents were basically a subset of test-1.sh - Add a conf.sh file for executing common functions with geom_uzip - Use attach_md for attaching md(4) devices - Don't hardcode /tmp for temporary files, which violates the kyua sandbox MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_uzip/Makefile | 2 +- tools/regression/geom_uzip/conf.sh | 20 ++++++++++++++ tools/regression/geom_uzip/runtests.sh | 10 ------- tools/regression/geom_uzip/test-1.sh | 36 -------------------------- tools/regression/geom_uzip/test-1.t | 22 ++++++++++++++++ tools/regression/geom_uzip/test-2.sh | 15 ----------- 6 files changed, 43 insertions(+), 62 deletions(-) create mode 100755 tools/regression/geom_uzip/conf.sh delete mode 100644 tools/regression/geom_uzip/runtests.sh delete mode 100644 tools/regression/geom_uzip/test-1.sh create mode 100644 tools/regression/geom_uzip/test-1.t delete mode 100644 tools/regression/geom_uzip/test-2.sh diff --git a/tools/regression/geom_uzip/Makefile b/tools/regression/geom_uzip/Makefile index 6927ff10812..3186ef61d7b 100644 --- a/tools/regression/geom_uzip/Makefile +++ b/tools/regression/geom_uzip/Makefile @@ -9,7 +9,7 @@ ZIMAGE= ${IMAGE}.uzip UZIMAGE= ${ZIMAGE}.uue test: - @sh runtests.sh + prove -rv ./test-1.t image: makefs -s 1048576 ${IMAGE} etalon diff --git a/tools/regression/geom_uzip/conf.sh b/tools/regression/geom_uzip/conf.sh new file mode 100755 index 00000000000..9a22841fdbe --- /dev/null +++ b/tools/regression/geom_uzip/conf.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# $FreeBSD$ + +class="uzip" +base=`basename $0` + +uzip_test_cleanup() +{ + if [ -n "$mntpoint" ]; then + umount $mntpoint + rmdir $mntpoint + fi + geom_test_cleanup +} +trap uzip_test_cleanup ABRT EXIT INT TERM + +. `dirname $0`/../geom_subr.sh + +# NOTE: make sure $TMPDIR has been set by geom_subr.sh if unset [by kyua, etc] +mntpoint=$(mktemp -d tmp.XXXXXX) || exit diff --git a/tools/regression/geom_uzip/runtests.sh b/tools/regression/geom_uzip/runtests.sh deleted file mode 100644 index 60e78b54074..00000000000 --- a/tools/regression/geom_uzip/runtests.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -dir=`dirname $0` - -for ts in `dirname $0`/test-*.sh; do - sh $ts -done diff --git a/tools/regression/geom_uzip/test-1.sh b/tools/regression/geom_uzip/test-1.sh deleted file mode 100644 index 7e8f16840b2..00000000000 --- a/tools/regression/geom_uzip/test-1.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -mntpoint="/mnt/test-1" - -# -# prepare -kldload geom_uzip -uudecode test-1.img.uzip.uue -num=`mdconfig -an -f test-1.img.uzip` || exit 1 -sleep 1 - -# -# mount -mkdir -p "${mntpoint}" -mount -o ro /dev/md${num}.uzip "${mntpoint}" || exit 1 - -# -# compare -#cat "${mntpoint}/etalon.txt" -diff -u etalon/etalon.txt "${mntpoint}/etalon.txt" -if [ $? -eq 0 ]; then - echo "PASS" -else - echo "FAIL" -fi - -# -# cleanup -umount "${mntpoint}" -rmdir "${mntpoint}" -mdconfig -d -u ${num} -sleep 1 -kldunload geom_uzip diff --git a/tools/regression/geom_uzip/test-1.t b/tools/regression/geom_uzip/test-1.t new file mode 100644 index 00000000000..b156c067456 --- /dev/null +++ b/tools/regression/geom_uzip/test-1.t @@ -0,0 +1,22 @@ +#!/bin/sh +# $FreeBSD$ + +testsdir=$(dirname $0) +. $testsdir/conf.sh + +echo "1..1" + +UUE=$testsdir/test-1.img.uzip.uue +uudecode $UUE +us0=$(attach_md -f $(basename $UUE .uue)) || exit 1 +sleep 1 + +mount -o ro /dev/${us0}.uzip "${mntpoint}" || exit 1 + +#cat "${mntpoint}/etalon.txt" +diff -I '\$FreeBSD.*\$' -u $testsdir/etalon/etalon.txt "${mntpoint}/etalon.txt" +if [ $? -eq 0 ]; then + echo "ok 1" +else + echo "not ok 1" +fi diff --git a/tools/regression/geom_uzip/test-2.sh b/tools/regression/geom_uzip/test-2.sh deleted file mode 100644 index 866282753e2..00000000000 --- a/tools/regression/geom_uzip/test-2.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -# -# prepare -kldload geom_uzip -uudecode test-1.img.uzip.uue -num=`mdconfig -an -f test-1.img.uzip` || exit 1 -sleep 1 - -# -# destroy -kldunload geom_uzip From f009f68a211b2ae25e8a5d5caec60267091bcada Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 8 Jan 2016 21:47:41 +0000 Subject: [PATCH 12/67] - Move functions that might be used in class-specific cleanup functions (geom_test_cleanup, etc) down so the testcases don't emit noise when bailing - Conform to the TAP protocol better when dealing with classes that can't be loaded and with temporary files that can't be allocated for tracking md(4) devices. MFC after: 2 weeks X-MFC with: r293028, r293029, r293048 Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_subr.sh | 43 ++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/tools/regression/geom_subr.sh b/tools/regression/geom_subr.sh index 58dc31dd5b4..b437183efed 100644 --- a/tools/regression/geom_subr.sh +++ b/tools/regression/geom_subr.sh @@ -1,13 +1,6 @@ #!/bin/sh # $FreeBSD$ -if [ $(id -u) -ne 0 ]; then - echo 'Tests must be run as root' - echo 'Bail out!' - exit 1 -fi -kldstat -q -m g_${class} || geom ${class} load || exit 1 - devwait() { while :; do @@ -18,13 +11,6 @@ devwait() done } -# Need to keep track of the test md devices to avoid the scenario where a test -# failing will cause the other tests to bomb out, or a test failing will leave -# a large number of md(4) devices lingering around -: ${TMPDIR=/tmp} -export TMPDIR -TEST_MDS_FILE=$(mktemp ${TMPDIR}/test_mds.XXXXXX) || exit 1 - attach_md() { local test_md @@ -38,12 +24,37 @@ geom_test_cleanup() { local test_md - if [ -f $TEST_MDS_FILE ]; then + if [ -f "$TEST_MDS_FILE" ]; then while read test_md; do # The "#" tells the TAP parser this is a comment echo "# Removing test memory disk: $test_md" mdconfig -d -u $test_md done < $TEST_MDS_FILE fi - rm -f $TEST_MDS_FILE + rm -f "$TEST_MDS_FILE" } + +if [ $(id -u) -ne 0 ]; then + echo 'Tests must be run as root' + echo 'Bail out!' + exit 1 +fi +# If the geom class isn't already loaded, try loading it. +if ! kldstat -q -m g_${class}; then + if ! geom ${class} load; then + echo "Could not load module for geom class=${class}" + echo 'Bail out!' + exit 1 + fi +fi + +# Need to keep track of the test md devices to avoid the scenario where a test +# failing will cause the other tests to bomb out, or a test failing will leave +# a large number of md(4) devices lingering around +: ${TMPDIR=/tmp} +export TMPDIR +if ! TEST_MDS_FILE=$(mktemp ${TMPDIR}/test_mds.XXXXXX); then + echo 'Failed to create temporary file for tracking the test md(4) devices' + echo 'Bail out!' + exit 1 +fi From 1f17dba48291d9081346b5b70b1d4d47baebfc21 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Fri, 8 Jan 2016 22:59:49 +0000 Subject: [PATCH 13/67] Chase r292622: Update path to ioctl.c for incremental build hack. Sponsored by: EMC / Isilon Storage Division --- Makefile.inc1 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile.inc1 b/Makefile.inc1 index a85f2e355c9..3429fdaae8a 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -570,9 +570,8 @@ _worldtmp: .PHONY .else rm -rf ${WORLDTMP}/legacy/usr/include # XXX - These three can depend on any header file. - rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/ioctl.c + rm -f ${OBJTREE}${.CURDIR}/lib/libsysdecode/ioctl.c rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c - rm -f ${OBJTREE}${.CURDIR}/usr.bin/truss/ioctl.c .endif .for _dir in \ lib usr legacy/bin legacy/usr From b80f3546a8da40a0fe47415c8b6e107b21007684 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Sat, 9 Jan 2016 00:42:07 +0000 Subject: [PATCH 14/67] Support use of LLVM's libunwind for exception unwinding It is built in libgcc_s.so and libgcc_eh.a to simplify transition. It is enabled by default on arm64 (where we previously had no other unwinder) and may be enabled for testing on other platforms by setting WITH_LLVM_LIBUNWIND in src.conf(5). Also add compiler-rt's __gcc_personality_v0 implementation for use with the LLVM unwinder. Relnotes: Yes Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D4787 --- gnu/lib/libgcc/Makefile | 55 ++++++++++++++++++++-- share/mk/src.opts.mk | 4 +- tools/build/options/WITHOUT_LLVM_LIBUNWIND | 2 + tools/build/options/WITH_LLVM_LIBUNWIND | 2 + 4 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 tools/build/options/WITHOUT_LLVM_LIBUNWIND create mode 100644 tools/build/options/WITH_LLVM_LIBUNWIND diff --git a/gnu/lib/libgcc/Makefile b/gnu/lib/libgcc/Makefile index 6972146dc9f..ad4e0f1fd38 100644 --- a/gnu/lib/libgcc/Makefile +++ b/gnu/lib/libgcc/Makefile @@ -2,6 +2,9 @@ GCCDIR= ${.CURDIR}/../../../contrib/gcc GCCLIB= ${.CURDIR}/../../../contrib/gcclibs +COMPILERRTDIR= ${.CURDIR}/../../../contrib/compiler-rt +UNWINDINCDIR= ${.CURDIR}/../../../contrib/llvm/projects/libunwind/include +UNWINDSRCDIR= ${.CURDIR}/../../../contrib/llvm/projects/libunwind/src SHLIB_NAME= libgcc_s.so.1 SHLIBDIR?= /lib @@ -67,8 +70,37 @@ LIB2ADD = $(LIB2FUNCS_EXTRA) LIB2ADD_ST = $(LIB2FUNCS_STATIC_EXTRA) # Additional sources to handle exceptions; overridden by targets as needed. +.if ${MK_LLVM_LIBUNWIND} != "no" + +.PATH: ${COMPILERRTDIR}/lib/builtins +.PATH: ${UNWINDSRCDIR} +LIB2ADDEH = gcc_personality_v0.c \ + int_util.c \ + Unwind-EHABI.cpp \ + Unwind-sjlj.c \ + UnwindLevel1-gcc-ext.c \ + UnwindLevel1.c \ + UnwindRegistersRestore.S \ + UnwindRegistersSave.S \ + libunwind.cpp + +CFLAGS+= -I${UNWINDINCDIR} -I${.CURDIR} +.if empty(CXXFLAGS:M-std=*) +CXXFLAGS+= -std=c++11 +.endif +CXXFLAGS+= -fno-rtti + +.else # MK_LLVM_LIBUNWIND + +.if ${TARGET_CPUARCH} == "arm" +LIB2ADDEH = unwind-arm.c libunwind.S pr-support.c unwind-c.c +.else LIB2ADDEH = unwind-dw2.c unwind-dw2-fde-glibc.c unwind-sjlj.c gthr-gnat.c \ unwind-c.c +.endif + +.endif # MK_LLVM_LIBUNWIND + LIB2ADDEHSTATIC = $(LIB2ADDEH) LIB2ADDEHSHARED = $(LIB2ADDEH) @@ -116,7 +148,6 @@ CFLAGS.clang+= -fheinous-gnu-extensions LIB1ASMSRC = lib1funcs.asm LIB1ASMFUNCS = _dvmd_tls _bb_init_func -LIB2ADDEH = unwind-arm.c libunwind.S pr-support.c unwind-c.c # Some compilers generate __aeabi_ functions libgcc_s is missing LIBADD+= compiler_rt .endif @@ -160,7 +191,10 @@ LIB2_DIVMOD_FUNCS:= ${LIB2_DIVMOD_FUNCS:S/${sym}//g} .endfor .endif -COMMONHDRS= tm.h tconfig.h options.h unwind.h gthr-default.h +COMMONHDRS= tm.h tconfig.h options.h gthr-default.h +.if ${MK_LLVM_LIBUNWIND} == no +COMMONHDRS+= unwind.h +.endif #----------------------------------------------------------------------- # @@ -170,6 +204,9 @@ HIDE = -fvisibility=hidden -DHIDE_EXPORTS CC_T = ${CC} -c ${CFLAGS} ${HIDE} -fPIC CC_P = ${CC} -c ${CFLAGS} ${HIDE} -p -fPIC CC_S = ${CC} -c ${CFLAGS} ${PICFLAG} -DSHARED +CXX_T = ${CXX} -c ${CXXFLAGS} ${HIDE} -fPIC +CXX_P = ${CXX} -c ${CXXFLAGS} ${HIDE} -p -fPIC +CXX_S = ${CXX} -c ${CXXFLAGS} ${PICFLAG} -DSHARED #----------------------------------------------------------------------- # @@ -284,16 +321,26 @@ EH_OBJS_S = ${LIB2ADDEHSHARED:R:S/$/.So/} EH_CFLAGS = -fexceptions -D__GLIBC__=3 -DElfW=__ElfN SOBJS += ${EH_OBJS_S} -.for _src in ${LIB2ADDEHSTATIC} +.for _src in ${LIB2ADDEHSTATIC:M*.c} ${_src:R:S/$/.o/}: ${_src} ${COMMONHDRS} ${CC_T} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} ${_src:R:S/$/.po/}: ${_src} ${COMMONHDRS} ${CC_P} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} .endfor -.for _src in ${LIB2ADDEHSHARED} +.for _src in ${LIB2ADDEHSTATIC:M*.cpp} +${_src:R:S/$/.o/}: ${_src} ${COMMONHDRS} + ${CXX_T} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} +${_src:R:S/$/.po/}: ${_src} ${COMMONHDRS} + ${CXX_P} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} +.endfor +.for _src in ${LIB2ADDEHSHARED:M*.c} ${_src:R:S/$/.So/}: ${_src} ${COMMONHDRS} ${CC_S} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} .endfor +.for _src in ${LIB2ADDEHSHARED:M*.cpp} +${_src:R:S/$/.So/}: ${_src} ${COMMONHDRS} + ${CXX_S} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC} +.endfor #----------------------------------------------------------------------- diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk index 47a67e02e52..678c31ef054 100644 --- a/share/mk/src.opts.mk +++ b/share/mk/src.opts.mk @@ -231,9 +231,9 @@ __DEFAULT_NO_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_FULL CLANG_IS_CC # In-tree binutils/gcc are older versions without modern architecture support. .if ${__T} == "aarch64" || ${__T} == "riscv64" BROKEN_OPTIONS+=BINUTILS BINUTILS_BOOTSTRAP GCC GCC_BOOTSTRAP GDB -__DEFAULT_YES_OPTIONS+=ELFCOPY_AS_OBJCOPY +__DEFAULT_YES_OPTIONS+=ELFCOPY_AS_OBJCOPY LLVM_LIBUNWIND .else -__DEFAULT_NO_OPTIONS+=ELFCOPY_AS_OBJCOPY +__DEFAULT_NO_OPTIONS+=ELFCOPY_AS_OBJCOPY LLVM_LIBUNWIND .endif .if ${__T} == "riscv64" BROKEN_OPTIONS+=PROFILE # "sorry, unimplemented: profiler support for RISC-V" diff --git a/tools/build/options/WITHOUT_LLVM_LIBUNWIND b/tools/build/options/WITHOUT_LLVM_LIBUNWIND new file mode 100644 index 00000000000..63a904230a2 --- /dev/null +++ b/tools/build/options/WITHOUT_LLVM_LIBUNWIND @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Set to use GCC's stack unwinder (instead of LLVM's libunwind). diff --git a/tools/build/options/WITH_LLVM_LIBUNWIND b/tools/build/options/WITH_LLVM_LIBUNWIND new file mode 100644 index 00000000000..ff63884af9e --- /dev/null +++ b/tools/build/options/WITH_LLVM_LIBUNWIND @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder). From eefa1c373f91ccbce0a1ab8ea391e3b31e61bea0 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Sat, 9 Jan 2016 00:43:11 +0000 Subject: [PATCH 15/67] Update dependencies. Sponsored by: EMC / Isilon Storage Division --- sys/boot/userboot/libstand/Makefile.depend | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/boot/userboot/libstand/Makefile.depend b/sys/boot/userboot/libstand/Makefile.depend index dae5bcdc5da..57cd80c479e 100644 --- a/sys/boot/userboot/libstand/Makefile.depend +++ b/sys/boot/userboot/libstand/Makefile.depend @@ -6,7 +6,6 @@ DIRDEPS = \ include/arpa \ include/xlocale \ lib/libbz2 \ - lib/libstand \ .include From 41ea4cc219aad609536622471003db3183d4ddb0 Mon Sep 17 00:00:00 2001 From: Glen Barber Date: Sat, 9 Jan 2016 00:45:38 +0000 Subject: [PATCH 16/67] Set FORCE_PKG_REGISTER=1 when installing packages to avoid failures when re-using build chroot(8) environments. This is based on the patch in the PR referenced below, but instead of using 'reinstall' in two locations (one of which already uses FORCE_PKG_REGISTER=1), changes the non-embedded behavior. PR: 205998 Submitted by: ngie MFC after: 5 days Sponsored by: The FreeBSD Foundation --- release/release.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/release/release.sh b/release/release.sh index 313a16c8bb5..caa6f267fbf 100755 --- a/release/release.sh +++ b/release/release.sh @@ -275,6 +275,7 @@ extra_chroot_setup() { PBUILD_FLAGS="${PBUILD_FLAGS} OSREL=${REVISION}" chroot ${CHROOTDIR} make -C /usr/ports/textproc/docproj \ ${PBUILD_FLAGS} OPTIONS_UNSET="FOP IGOR" \ + FORCE_PKG_REGISTER=1 \ install clean distclean fi fi From 1500eb213dd54c505d9860a3cd907049701c7854 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Sat, 9 Jan 2016 00:47:01 +0000 Subject: [PATCH 17/67] Regen after r293450 --- share/man/man5/src.conf.5 | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 4994aab1813..60bf53a96c4 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,7 +1,7 @@ .\" DO NOT EDIT-- this file is automatically generated. .\" from FreeBSD: head/tools/build/options/makeman 292283 2015-12-15 18:42:30Z bdrewery .\" $FreeBSD$ -.Dd December 15, 2015 +.Dd January 9, 2016 .Dt SRC.CONF 5 .Os .Sh NAME @@ -948,9 +948,30 @@ Set to not build the .Nm libthr (1:1 threading) library. +.It Va WITHOUT_LLDB +.\" from FreeBSD: head/tools/build/options/WITHOUT_LLDB 289275 2015-10-14 00:23:31Z emaste +Set to not build the LLDB debugger. +.Pp +It is a default setting on +arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_LLDB .\" from FreeBSD: head/tools/build/options/WITH_LLDB 255722 2013-09-20 01:52:02Z emaste Set to build the LLDB debugger. +.Pp +It is a default setting on +amd64/amd64 and arm64/aarch64. +.It Va WITHOUT_LLVM_LIBUNWIND +.\" from FreeBSD: head/tools/build/options/WITHOUT_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste +Set to use GCC's stack unwinder (instead of LLVM's libunwind). +.Pp +It is a default setting on +amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. +.It Va WITH_LLVM_LIBUNWIND +.\" from FreeBSD: head/tools/build/options/WITH_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste +Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder). +.Pp +It is a default setting on +arm64/aarch64. .It Va WITHOUT_LOCALES .\" from FreeBSD: head/tools/build/options/WITHOUT_LOCALES 156932 2006-03-21 07:50:50Z ru Set to not build localization files; see From fce8d0e350a77330a34844fee43103cf163377bb Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Sat, 9 Jan 2016 00:54:08 +0000 Subject: [PATCH 18/67] Only call init_zfs_bootenv() when the system was booted with ZFS Add a few other safeguards to ensure things do not break when the boot device cannot be determined Reported by: flo MFC after: 3 days Sponsored by: ScaleEngine Inc. --- sys/boot/i386/loader/main.c | 18 ++++++++++++------ sys/boot/userboot/userboot/main.c | 19 ++++++++++++------- sys/boot/zfs/zfs.c | 9 +++++++-- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/sys/boot/i386/loader/main.c b/sys/boot/i386/loader/main.c index 73f3507fb8f..2b30b92161f 100644 --- a/sys/boot/i386/loader/main.c +++ b/sys/boot/i386/loader/main.c @@ -262,6 +262,7 @@ extract_currdev(void) new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.d_dev = &zfs_dev; + init_zfs_bootenv(zfs_fmtdev(&new_currdev)); #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ @@ -295,10 +296,6 @@ extract_currdev(void) new_currdev.d_unit = 0; } -#ifdef LOADER_ZFS_SUPPORT - init_zfs_bootenv(zfs_fmtdev(&new_currdev)); -#endif - env_setenv("currdev", EV_VOLATILE, i386_fmtdev(&new_currdev), i386_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset, @@ -311,9 +308,14 @@ init_zfs_bootenv(char *currdev) { char *beroot; + if (strlen(currdev) == 0) + return; + if(strncmp(currdev, "zfs:", 4) != 0) + return; /* Remove the trailing : */ currdev[strlen(currdev) - 1] = '\0'; setenv("zfs_be_active", currdev, 1); + setenv("zfs_be_currpage", "1", 1); /* Do not overwrite if already set */ setenv("vfs.root.mountfrom", currdev, 0); /* Forward past zfs: */ @@ -323,9 +325,7 @@ init_zfs_bootenv(char *currdev) beroot = strrchr(currdev, '/'); if (beroot != NULL) beroot[0] = '\0'; - beroot = currdev; - setenv("zfs_be_root", beroot, 1); } #endif @@ -394,6 +394,7 @@ static int command_reloadbe(int argc, char *argv[]) { int err; + char *root; if (argc > 2) { command_errmsg = "wrong number of arguments"; @@ -403,6 +404,11 @@ command_reloadbe(int argc, char *argv[]) if (argc == 2) { err = zfs_bootenv(argv[1]); } else { + root = getenv("zfs_be_root"); + if (root == NULL) { + /* There does not appear to be a ZFS pool here, exit without error */ + return (CMD_OK); + } err = zfs_bootenv(getenv("zfs_be_root")); } diff --git a/sys/boot/userboot/userboot/main.c b/sys/boot/userboot/userboot/main.c index 335c8fd05c1..a52550cbaea 100644 --- a/sys/boot/userboot/userboot/main.c +++ b/sys/boot/userboot/userboot/main.c @@ -168,6 +168,7 @@ extract_currdev(void) zdev.d_type = zdev.d_dev->dv_type; dev = *(struct disk_devdesc *)&zdev; + init_zfs_bootenv(zfs_fmtdev(&dev)); } else #endif @@ -191,10 +192,6 @@ extract_currdev(void) dev.d_unit = 0; } -#if defined(USERBOOT_ZFS_SUPPORT) - init_zfs_bootenv(zfs_fmtdev(&dev)); -#endif - env_setenv("currdev", EV_VOLATILE, userboot_fmtdev(&dev), userboot_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, userboot_fmtdev(&dev), @@ -207,9 +204,14 @@ init_zfs_bootenv(char *currdev) { char *beroot; + if (strlen(currdev) == 0) + return; + if(strncmp(currdev, "zfs:", 4) != 0) + return; /* Remove the trailing : */ currdev[strlen(currdev) - 1] = '\0'; setenv("zfs_be_active", currdev, 1); + setenv("zfs_be_currpage", "1", 1); /* Do not overwrite if already set */ setenv("vfs.root.mountfrom", currdev, 0); /* Forward past zfs: */ @@ -219,9 +221,7 @@ init_zfs_bootenv(char *currdev) beroot = strrchr(currdev, '/'); if (beroot != NULL) beroot[0] = '\0'; - beroot = currdev; - setenv("zfs_be_root", beroot, 1); } @@ -273,6 +273,7 @@ static int command_reloadbe(int argc, char *argv[]) { int err; + char *root; if (argc > 2) { command_errmsg = "wrong number of arguments"; @@ -282,7 +283,11 @@ command_reloadbe(int argc, char *argv[]) if (argc == 2) { err = zfs_bootenv(argv[1]); } else { - err = zfs_bootenv(getenv("zfs_be_root")); + root = getenv("zfs_be_root"); + if (root == NULL) { + return (CMD_OK); + } + err = zfs_bootenv(root); } if (err != 0) { diff --git a/sys/boot/zfs/zfs.c b/sys/boot/zfs/zfs.c index fdb79bb21c1..c339b2d7beb 100644 --- a/sys/boot/zfs/zfs.c +++ b/sys/boot/zfs/zfs.c @@ -712,13 +712,18 @@ zfs_list(const char *name) int zfs_bootenv(const char *name) { - static char poolname[ZFS_MAXNAMELEN], *dsname; + static char poolname[ZFS_MAXNAMELEN], *dsname, *root; char becount[4]; uint64_t objid; spa_t *spa; int len, rv, pages, perpage, currpage; - if (strcmp(name, getenv("zfs_be_root")) != 0) { + if (name == NULL) + return (EINVAL); + if ((root = getenv("zfs_be_root")) == NULL) + return (EINVAL); + + if (strcmp(name, root) != 0) { if (setenv("zfs_be_root", name, 1) != 0) return (ENOMEM); } From 11f9ca696ee12e1732dd2f15dd8660e0689194b6 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sat, 9 Jan 2016 01:56:46 +0000 Subject: [PATCH 19/67] Prevent cv_waiters wraparound. r282971 attempted to fix this problem by decrementing cv_waiters after waking up from sleeping on a condition variable, but this can result in a use-after-free if the CV is freed before all woken threads have had a chance to run. Instead, avoid incrementing cv_waiters past INT_MAX, and have cv_signal() explicitly check for sleeping threads once cv_waiters has reached this bound. Reviewed by: jhb MFC after: 2 weeks Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D4822 --- sys/kern/kern_condvar.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c index 2700a25d477..95a6d09e177 100644 --- a/sys/kern/kern_condvar.c +++ b/sys/kern/kern_condvar.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -46,6 +47,17 @@ __FBSDID("$FreeBSD$"); #include #endif +/* + * A bound below which cv_waiters is valid. Once cv_waiters reaches this bound, + * cv_signal must manually check the wait queue for threads. + */ +#define CV_WAITERS_BOUND INT_MAX + +#define CV_WAITERS_INC(cvp) do { \ + if ((cvp)->cv_waiters < CV_WAITERS_BOUND) \ + (cvp)->cv_waiters++; \ +} while (0) + /* * Common sanity checks for cv_wait* functions. */ @@ -122,7 +134,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock) sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); @@ -184,7 +196,7 @@ _cv_wait_unlock(struct cv *cvp, struct lock_object *lock) sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); @@ -240,7 +252,7 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock) sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); @@ -307,7 +319,7 @@ _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); @@ -376,7 +388,7 @@ _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock, sleepq_lock(cvp); - cvp->cv_waiters++; + CV_WAITERS_INC(cvp); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); @@ -422,8 +434,15 @@ cv_signal(struct cv *cvp) wakeup_swapper = 0; sleepq_lock(cvp); if (cvp->cv_waiters > 0) { - cvp->cv_waiters--; - wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0); + if (cvp->cv_waiters == CV_WAITERS_BOUND && + sleepq_lookup(cvp) == NULL) { + cvp->cv_waiters = 0; + } else { + if (cvp->cv_waiters < CV_WAITERS_BOUND) + cvp->cv_waiters--; + wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, + 0); + } } sleepq_release(cvp); if (wakeup_swapper) From b60204754b74b5507e62229b03544529dc0a13d1 Mon Sep 17 00:00:00 2001 From: "George V. Neville-Neil" Date: Sat, 9 Jan 2016 03:08:21 +0000 Subject: [PATCH 20/67] Add netmap support for bhyve Submitted by: btw MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D4826 --- usr.sbin/bhyve/pci_virtio_net.c | 305 ++++++++++++++++++++++++++++---- 1 file changed, 273 insertions(+), 32 deletions(-) diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index aa9b581d90b..a8c56210e92 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -36,6 +36,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifndef NETMAP_WITH_LIBS +#define NETMAP_WITH_LIBS +#endif +#include #include #include @@ -133,6 +137,8 @@ struct pci_vtnet_softc { struct mevent *vsc_mevp; int vsc_tapfd; + struct nm_desc *vsc_nmd; + int vsc_rx_ready; volatile int resetting; /* set and checked outside lock */ @@ -149,6 +155,10 @@ struct pci_vtnet_softc { pthread_mutex_t tx_mtx; pthread_cond_t tx_cond; int tx_in_progress; + + void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc); + void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov, + int iovcnt, int len); }; static void pci_vtnet_reset(void *); @@ -371,14 +381,208 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) vq_endchains(vq, 1); } +static int +pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) +{ + int r, i; + int len = 0; + + for (r = nmd->cur_tx_ring; ; ) { + struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); + uint32_t cur, idx; + char *buf; + + if (nm_ring_empty(ring)) { + r++; + if (r > nmd->last_tx_ring) + r = nmd->first_tx_ring; + if (r == nmd->cur_rx_ring) + break; + continue; + } + cur = ring->cur; + idx = ring->slot[cur].buf_idx; + buf = NETMAP_BUF(ring, idx); + + for (i = 0; i < iovcnt; i++) { + memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); + len += iov[i].iov_len; + } + ring->slot[cur].len = len; + ring->head = ring->cur = nm_ring_next(ring, cur); + nmd->cur_tx_ring = r; + ioctl(nmd->fd, NIOCTXSYNC, NULL); + break; + } + + return (len); +} + +static inline int +pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) +{ + int len = 0; + int i = 0; + int r; + + for (r = nmd->cur_rx_ring; ; ) { + struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); + uint32_t cur, idx; + char *buf; + size_t left; + + if (nm_ring_empty(ring)) { + r++; + if (r > nmd->last_rx_ring) + r = nmd->first_rx_ring; + if (r == nmd->cur_rx_ring) + break; + continue; + } + cur = ring->cur; + idx = ring->slot[cur].buf_idx; + buf = NETMAP_BUF(ring, idx); + left = ring->slot[cur].len; + + for (i = 0; i < iovcnt && left > 0; i++) { + if (iov[i].iov_len > left) + iov[i].iov_len = left; + memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); + len += iov[i].iov_len; + left -= iov[i].iov_len; + } + ring->head = ring->cur = nm_ring_next(ring, cur); + nmd->cur_rx_ring = r; + ioctl(nmd->fd, NIOCRXSYNC, NULL); + break; + } + for (; i < iovcnt; i++) + iov[i].iov_len = 0; + + return (len); +} + +/* + * Called to send a buffer chain out to the vale port + */ static void -pci_vtnet_tap_callback(int fd, enum ev_type type, void *param) +pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, + int len) +{ + static char pad[60]; /* all zero bytes */ + + if (sc->vsc_nmd == NULL) + return; + + /* + * If the length is < 60, pad out to that and add the + * extra zero'd segment to the iov. It is guaranteed that + * there is always an extra iov available by the caller. + */ + if (len < 60) { + iov[iovcnt].iov_base = pad; + iov[iovcnt].iov_len = 60 - len; + iovcnt++; + } + (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt); +} + +static void +pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) +{ + struct iovec iov[VTNET_MAXSEGS], *riov; + struct vqueue_info *vq; + void *vrx; + int len, n; + uint16_t idx; + + /* + * Should never be called without a valid netmap descriptor + */ + assert(sc->vsc_nmd != NULL); + + /* + * But, will be called when the rx ring hasn't yet + * been set up or the guest is resetting the device. + */ + if (!sc->vsc_rx_ready || sc->resetting) { + /* + * Drop the packet and try later. + */ + (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); + return; + } + + /* + * Check for available rx buffers + */ + vq = &sc->vsc_queues[VTNET_RXQ]; + if (!vq_has_descs(vq)) { + /* + * Drop the packet and try later. Interrupt on + * empty, if that's negotiated. + */ + (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); + vq_endchains(vq, 1); + return; + } + + do { + /* + * Get descriptor chain. + */ + n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); + assert(n >= 1 && n <= VTNET_MAXSEGS); + + /* + * Get a pointer to the rx header, and use the + * data immediately following it for the packet buffer. + */ + vrx = iov[0].iov_base; + riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); + + len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n); + + if (len == 0) { + /* + * No more packets, but still some avail ring + * entries. Interrupt if needed/appropriate. + */ + vq_endchains(vq, 0); + return; + } + + /* + * The only valid field in the rx packet header is the + * number of buffers if merged rx bufs were negotiated. + */ + memset(vrx, 0, sc->rx_vhdrlen); + + if (sc->rx_merge) { + struct virtio_net_rxhdr *vrxh; + + vrxh = vrx; + vrxh->vrh_bufs = 1; + } + + /* + * Release this chain and handle more chains. + */ + vq_relchain(vq, idx, len + sc->rx_vhdrlen); + } while (vq_has_descs(vq)); + + /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ + vq_endchains(vq, 1); +} + +static void +pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) { struct pci_vtnet_softc *sc = param; pthread_mutex_lock(&sc->rx_mtx); sc->rx_in_progress = 1; - pci_vtnet_tap_rx(sc); + sc->pci_vtnet_rx(sc); sc->rx_in_progress = 0; pthread_mutex_unlock(&sc->rx_mtx); @@ -421,7 +625,7 @@ pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) } DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n)); - pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen); + sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen); /* chain is processed, release it and set tlen */ vq_relchain(vq, idx, tlen); @@ -532,6 +736,67 @@ pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr) return (0); } +static void +pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname) +{ + char tbuf[80]; + + strcpy(tbuf, "/dev/"); + strlcat(tbuf, devname, sizeof(tbuf)); + + sc->pci_vtnet_rx = pci_vtnet_tap_rx; + sc->pci_vtnet_tx = pci_vtnet_tap_tx; + + sc->vsc_tapfd = open(tbuf, O_RDWR); + if (sc->vsc_tapfd == -1) { + WPRINTF(("open of tap device %s failed\n", tbuf)); + return; + } + + /* + * Set non-blocking and register for read + * notifications with the event loop + */ + int opt = 1; + if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { + WPRINTF(("tap device O_NONBLOCK failed\n")); + close(sc->vsc_tapfd); + sc->vsc_tapfd = -1; + } + + sc->vsc_mevp = mevent_add(sc->vsc_tapfd, + EVF_READ, + pci_vtnet_rx_callback, + sc); + if (sc->vsc_mevp == NULL) { + WPRINTF(("Could not register event\n")); + close(sc->vsc_tapfd); + sc->vsc_tapfd = -1; + } +} + +static void +pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) +{ + sc->pci_vtnet_rx = pci_vtnet_netmap_rx; + sc->pci_vtnet_tx = pci_vtnet_netmap_tx; + + sc->vsc_nmd = nm_open(ifname, NULL, 0, 0); + if (sc->vsc_nmd == NULL) { + WPRINTF(("open of netmap device %s failed\n", ifname)); + return; + } + + sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd, + EVF_READ, + pci_vtnet_rx_callback, + sc); + if (sc->vsc_mevp == NULL) { + WPRINTF(("Could not register event\n")); + nm_close(sc->vsc_nmd); + sc->vsc_nmd = NULL; + } +} static int pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) @@ -567,8 +832,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) */ mac_provided = 0; sc->vsc_tapfd = -1; + sc->vsc_nmd = NULL; if (opts != NULL) { - char tbuf[80]; int err; devname = vtopts = strdup(opts); @@ -583,36 +848,12 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) mac_provided = 1; } - strcpy(tbuf, "/dev/"); - strlcat(tbuf, devname, sizeof(tbuf)); + if (strncmp(devname, "vale", 4) == 0) + pci_vtnet_netmap_setup(sc, devname); + if (strncmp(devname, "tap", 3) == 0) + pci_vtnet_tap_setup(sc, devname); free(devname); - - sc->vsc_tapfd = open(tbuf, O_RDWR); - if (sc->vsc_tapfd == -1) { - WPRINTF(("open of tap device %s failed\n", tbuf)); - } else { - /* - * Set non-blocking and register for read - * notifications with the event loop - */ - int opt = 1; - if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { - WPRINTF(("tap device O_NONBLOCK failed\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } - - sc->vsc_mevp = mevent_add(sc->vsc_tapfd, - EVF_READ, - pci_vtnet_tap_callback, - sc); - if (sc->vsc_mevp == NULL) { - WPRINTF(("Could not register event\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } - } } /* From 77e4457b19380efdbac7a54dfff7d7e6dc9dd6f2 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Sat, 9 Jan 2016 03:20:01 +0000 Subject: [PATCH 21/67] Switch EFT boot1 to use libstand ARM and i386 already required libstand so switch to using it for all patforms, allowing the removal of custom print and memory methods. This is also a pre-cursor to enabling WARNS which highlighted a number of issues with the removed methods. MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/efi/boot1/Makefile | 3 +- sys/boot/efi/boot1/boot1.c | 269 ++---------------------------------- 2 files changed, 10 insertions(+), 262 deletions(-) diff --git a/sys/boot/efi/boot1/Makefile b/sys/boot/efi/boot1/Makefile index 3642314eac4..6ac63ee87a0 100644 --- a/sys/boot/efi/boot1/Makefile +++ b/sys/boot/efi/boot1/Makefile @@ -41,14 +41,13 @@ CFLAGS+= -fPIC LDFLAGS+= -Wl,-znocombreloc .endif -.if ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "i386" # # Add libstand for the runtime functions used by the compiler - for example # __aeabi_* (arm) or __divdi3 (i386). +# as well as required string and memory functions for all platforms. # DPADD+= ${LIBSTAND} LDADD+= -lstand -.endif DPADD+= ${LDSCRIPT} diff --git a/sys/boot/efi/boot1/boot1.c b/sys/boot/efi/boot1/boot1.c index e2e4c907dd0..458a73c2a3c 100644 --- a/sys/boot/efi/boot1/boot1.c +++ b/sys/boot/efi/boot1/boot1.c @@ -24,6 +24,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -33,28 +34,8 @@ __FBSDID("$FreeBSD$"); #define BSIZEMAX 16384 -typedef int putc_func_t(char c, void *arg); - -struct sp_data { - char *sp_buf; - u_int sp_len; - u_int sp_size; -}; - -static const char digits[] = "0123456789abcdef"; - -static void panic(const char *fmt, ...) __dead2; -static int printf(const char *fmt, ...); -static int putchar(char c, void *arg); -static int vprintf(const char *fmt, va_list ap); -static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap); - -static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap); -static int __putc(char c, void *arg); -static int __puts(const char *s, putc_func_t *putc, void *arg); -static int __sputc(char c, void *arg); -static char *__uitoa(char *buf, u_int val, int base); -static char *__ultoa(char *buf, u_long val, int base); +void panic(const char *fmt, ...) __dead2; +void putchar(int c); static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet); static void load(const char *fname); @@ -62,39 +43,6 @@ static void load(const char *fname); static EFI_SYSTEM_TABLE *systab; static EFI_HANDLE *image; -static void -bcopy(const void *src, void *dst, size_t len) -{ - const char *s = src; - char *d = dst; - - while (len-- != 0) - *d++ = *s++; -} - -static void -memcpy(void *dst, const void *src, size_t len) -{ - bcopy(src, dst, len); -} - -static void -bzero(void *b, size_t len) -{ - char *p = b; - - while (len-- != 0) - *p++ = 0; -} - -static int -strcmp(const char *s1, const char *s2) -{ - for (; *s1 == *s2 && *s1; s1++, s2++) - ; - return ((u_char)*s1 - (u_char)*s2); -} - static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL; static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL; static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL; @@ -346,38 +294,22 @@ load(const char *fname) EFI_ERROR_CODE(status)); } -static void +void panic(const char *fmt, ...) { - char buf[128]; va_list ap; + printf("panic: "); va_start(ap, fmt); - vsnprintf(buf, sizeof buf, fmt, ap); - printf("panic: %s\n", buf); + vprintf(fmt, ap); va_end(ap); + printf("\n"); while (1) {} } -static int -printf(const char *fmt, ...) -{ - va_list ap; - int ret; - - /* Don't annoy the user as we probe for partitions */ - if (strcmp(fmt,"Not ufs\n") == 0) - return 0; - - va_start(ap, fmt); - ret = vprintf(fmt, ap); - va_end(ap); - return (ret); -} - -static int -putchar(char c, void *arg) +void +putchar(int c) { CHAR16 buf[2]; @@ -389,187 +321,4 @@ putchar(char c, void *arg) buf[0] = c; buf[1] = 0; systab->ConOut->OutputString(systab->ConOut, buf); - return (1); -} - -static int -vprintf(const char *fmt, va_list ap) -{ - int ret; - - ret = __printf(fmt, putchar, 0, ap); - return (ret); -} - -static int -vsnprintf(char *str, size_t sz, const char *fmt, va_list ap) -{ - struct sp_data sp; - int ret; - - sp.sp_buf = str; - sp.sp_len = 0; - sp.sp_size = sz; - ret = __printf(fmt, __sputc, &sp, ap); - return (ret); -} - -static int -__printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap) -{ - char buf[(sizeof(long) * 8) + 1]; - char *nbuf; - u_long ul; - u_int ui; - int lflag; - int sflag; - char *s; - int pad; - int ret; - int c; - - nbuf = &buf[sizeof buf - 1]; - ret = 0; - while ((c = *fmt++) != 0) { - if (c != '%') { - ret += putc(c, arg); - continue; - } - lflag = 0; - sflag = 0; - pad = 0; -reswitch: c = *fmt++; - switch (c) { - case '#': - sflag = 1; - goto reswitch; - case '%': - ret += putc('%', arg); - break; - case 'c': - c = va_arg(ap, int); - ret += putc(c, arg); - break; - case 'd': - if (lflag == 0) { - ui = (u_int)va_arg(ap, int); - if (ui < (int)ui) { - ui = -ui; - ret += putc('-', arg); - } - s = __uitoa(nbuf, ui, 10); - } else { - ul = (u_long)va_arg(ap, long); - if (ul < (long)ul) { - ul = -ul; - ret += putc('-', arg); - } - s = __ultoa(nbuf, ul, 10); - } - ret += __puts(s, putc, arg); - break; - case 'l': - lflag = 1; - goto reswitch; - case 'o': - if (lflag == 0) { - ui = (u_int)va_arg(ap, u_int); - s = __uitoa(nbuf, ui, 8); - } else { - ul = (u_long)va_arg(ap, u_long); - s = __ultoa(nbuf, ul, 8); - } - ret += __puts(s, putc, arg); - break; - case 'p': - ul = (u_long)va_arg(ap, void *); - s = __ultoa(nbuf, ul, 16); - ret += __puts("0x", putc, arg); - ret += __puts(s, putc, arg); - break; - case 's': - s = va_arg(ap, char *); - ret += __puts(s, putc, arg); - break; - case 'u': - if (lflag == 0) { - ui = va_arg(ap, u_int); - s = __uitoa(nbuf, ui, 10); - } else { - ul = va_arg(ap, u_long); - s = __ultoa(nbuf, ul, 10); - } - ret += __puts(s, putc, arg); - break; - case 'x': - if (lflag == 0) { - ui = va_arg(ap, u_int); - s = __uitoa(nbuf, ui, 16); - } else { - ul = va_arg(ap, u_long); - s = __ultoa(nbuf, ul, 16); - } - if (sflag) - ret += __puts("0x", putc, arg); - ret += __puts(s, putc, arg); - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - pad = pad * 10 + c - '0'; - goto reswitch; - default: - break; - } - } - return (ret); -} - -static int -__sputc(char c, void *arg) -{ - struct sp_data *sp; - - sp = arg; - if (sp->sp_len < sp->sp_size) - sp->sp_buf[sp->sp_len++] = c; - sp->sp_buf[sp->sp_len] = '\0'; - return (1); -} - -static int -__puts(const char *s, putc_func_t *putc, void *arg) -{ - const char *p; - int ret; - - ret = 0; - for (p = s; *p != '\0'; p++) - ret += putc(*p, arg); - return (ret); -} - -static char * -__uitoa(char *buf, u_int ui, int base) -{ - char *p; - - p = buf; - *p = '\0'; - do - *--p = digits[ui % base]; - while ((ui /= base) != 0); - return (p); -} - -static char * -__ultoa(char *buf, u_long ul, int base) -{ - char *p; - - p = buf; - *p = '\0'; - do - *--p = digits[ul % base]; - while ((ul /= base) != 0); - return (p); } From 1b1632a2437c8422b4177b6bea2c11f21525e111 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Sat, 9 Jan 2016 03:30:33 +0000 Subject: [PATCH 22/67] Remove hidden "Not ufs" printfs from boot code Remove the printf("Not ufs\n") from the boot code which was hidden by the local printf implementations, allowing these to have that code removed too. MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/common/ufsread.c | 1 - sys/boot/efi/boot1/boot1.c | 1 - sys/boot/powerpc/boot1.chrp/boot1.c | 4 ---- 3 files changed, 6 deletions(-) diff --git a/sys/boot/common/ufsread.c b/sys/boot/common/ufsread.c index c02010f5284..d0ca57a5775 100644 --- a/sys/boot/common/ufsread.c +++ b/sys/boot/common/ufsread.c @@ -211,7 +211,6 @@ fsread(ufs_ino_t inode, void *buf, size_t nbyte) break; } if (sblock_try[n] == -1) { - printf("Not ufs\n"); return -1; } dsk_meta++; diff --git a/sys/boot/efi/boot1/boot1.c b/sys/boot/efi/boot1/boot1.c index 458a73c2a3c..b7592676ad3 100644 --- a/sys/boot/efi/boot1/boot1.c +++ b/sys/boot/efi/boot1/boot1.c @@ -198,7 +198,6 @@ fsstat(ufs_ino_t inode) break; } if (sblock_try[n] == -1) { - printf("Not ufs\n"); return -1; } dsk_meta++; diff --git a/sys/boot/powerpc/boot1.chrp/boot1.c b/sys/boot/powerpc/boot1.chrp/boot1.c index 30e695b7e5a..af22488a11e 100644 --- a/sys/boot/powerpc/boot1.chrp/boot1.c +++ b/sys/boot/powerpc/boot1.chrp/boot1.c @@ -564,10 +564,6 @@ printf(const char *fmt, ...) va_list ap; int ret; - /* Don't annoy the user as we probe for partitions */ - if (strcmp(fmt,"Not ufs\n") == 0) - return 0; - va_start(ap, fmt); ret = vprintf(fmt, ap); va_end(ap); From 16703ea8115a814be9cc31239073a5f16efebab1 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 9 Jan 2016 05:39:06 +0000 Subject: [PATCH 23/67] Please Coverity by removing unneccessary check (rt_key() is always set). Coverity CID: 1347797 --- sys/net/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/net/route.c b/sys/net/route.c index e09cc23887b..4df96901218 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -858,7 +858,7 @@ rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags) src = rt_key(rt); dst = info->rti_info[RTAX_DST]; sa_len = src->sa_len; - if (src != NULL && dst != NULL) { + if (dst != NULL) { if (src->sa_len > dst->sa_len) return (ENOMEM); memcpy(dst, src, src->sa_len); From f2b2e77a4190c80d2a8b495b7b830b7ac7bef90f Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 9 Jan 2016 06:26:40 +0000 Subject: [PATCH 24/67] (Temporarily) remove route_redirect_event eventhandler. Such handler should pass different set of variables, instead of directly providing 2 locked route entries. Given that it hasn't been really used since at least 2012, remove current code. Will re-add it after finishing most major routing-related changes. Discussed with: np --- sys/net/route.c | 15 ++------------- sys/net/route.h | 2 -- sys/netinet/toecore.c | 15 --------------- 3 files changed, 2 insertions(+), 30 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index 4df96901218..9698dd398ac 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -568,7 +568,7 @@ rtredirect_fib(struct sockaddr *dst, struct sockaddr *src, u_int fibnum) { - struct rtentry *rt, *rt0 = NULL; + struct rtentry *rt; int error = 0; short *stat = NULL; struct rt_addrinfo info; @@ -627,7 +627,7 @@ rtredirect_fib(struct sockaddr *dst, * Create new route, rather than smashing route to net. */ create: - rt0 = rt; + RTFREE(rt); rt = NULL; flags |= RTF_DYNAMIC; @@ -637,21 +637,14 @@ rtredirect_fib(struct sockaddr *dst, info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; - if (rt0 != NULL) - RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); if (rt != NULL) { RT_LOCK(rt); - if (rt0 != NULL) - EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); flags = rt->rt_flags; } - if (rt0 != NULL) - RTFREE(rt0); stat = &V_rtstat.rts_dynamic; } else { - struct rtentry *gwrt; /* * Smash the current notion of the gateway to @@ -669,11 +662,7 @@ rtredirect_fib(struct sockaddr *dst, RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); rt_setgate(rt, rt_key(rt), gateway); - gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); RADIX_NODE_HEAD_UNLOCK(rnh); - EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); - if (gwrt) - RTFREE_LOCKED(gwrt); } } else error = EHOSTUNREACH; diff --git a/sys/net/route.h b/sys/net/route.h index 7c69e1c9a1c..473e402ecdd 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -468,8 +468,6 @@ int rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t, void rib_free_info(struct rt_addrinfo *info); #include -typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); -EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn); #endif #endif diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c index 6ed8eb02d26..cfa77e76a12 100644 --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -70,7 +70,6 @@ static TAILQ_HEAD(, toedev) toedev_list; static eventhandler_tag listen_start_eh; static eventhandler_tag listen_stop_eh; static eventhandler_tag lle_event_eh; -static eventhandler_tag route_redirect_eh; static int toedev_connect(struct toedev *tod __unused, struct socket *so __unused, @@ -437,17 +436,6 @@ toe_lle_event(void *arg __unused, struct llentry *lle, int evt) tod->tod_l2_update(tod, ifp, sa, lladdr, vtag); } -/* - * XXX: implement. - */ -static void -toe_route_redirect_event(void *arg __unused, struct rtentry *rt0, - struct rtentry *rt1, struct sockaddr *sa) -{ - - return; -} - /* * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's @@ -534,8 +522,6 @@ toecore_load(void) toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, EVENTHANDLER_PRI_ANY); - route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event, - toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY); return (0); } @@ -553,7 +539,6 @@ toecore_unload(void) EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); - EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh); mtx_unlock(&toedev_lock); mtx_destroy(&toedev_lock); From 06ddd0b7ac08ee057f969b2adf2c7ba8379ea6e1 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sat, 9 Jan 2016 08:02:35 +0000 Subject: [PATCH 25/67] Fix a typo. PR: 205722 --- lib/libstand/uuid_to_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libstand/uuid_to_string.c b/lib/libstand/uuid_to_string.c index efe921c10b9..418e8dcc629 100644 --- a/lib/libstand/uuid_to_string.c +++ b/lib/libstand/uuid_to_string.c @@ -107,5 +107,5 @@ uuid_to_string(const uuid_t *u, char **s, uint32_t *status) tohex(&w, 2, u->node[3]); tohex(&w, 2, u->node[4]); tohex(&w, 2, u->node[5]); - *w++ - '\0'; + *w++ = '\0'; } From 6299675b9f53997ce5fe87e78f9637db4a95fa1a Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sat, 9 Jan 2016 08:04:29 +0000 Subject: [PATCH 26/67] Make tohex() work as expected. --- lib/libstand/uuid_to_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libstand/uuid_to_string.c b/lib/libstand/uuid_to_string.c index 418e8dcc629..d878af495ac 100644 --- a/lib/libstand/uuid_to_string.c +++ b/lib/libstand/uuid_to_string.c @@ -46,7 +46,7 @@ tohex(char **buf, int len, uint32_t val) char *walker = *buf; int i; - for (i = len - 1; i >= 0; i++) { + for (i = len - 1; i >= 0; i--) { walker[i] = hexstr[val & 0xf]; val >>= 4; } From 1c2b1cecf14f409ed48d52f914ddafe0609231e6 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sat, 9 Jan 2016 09:33:24 +0000 Subject: [PATCH 27/67] Fix version number. --- usr.sbin/ntp/scripts/mkver | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/ntp/scripts/mkver b/usr.sbin/ntp/scripts/mkver index 2bc36b5354a..6a997560504 100755 --- a/usr.sbin/ntp/scripts/mkver +++ b/usr.sbin/ntp/scripts/mkver @@ -6,7 +6,7 @@ PROG=${1-UNKNOWN} ConfStr="$PROG" -ConfStr="$ConfStr 4.2.8p4" +ConfStr="$ConfStr 4.2.8p5" case "$CSET" in '') ;; From ea8d14925c6a7e96949493984f558411d0fa4380 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 9 Jan 2016 09:34:39 +0000 Subject: [PATCH 28/67] Remove sys/eventhandler.h from net/route.h Reviewed by: ae --- sys/net/route.h | 1 - sys/net80211/ieee80211_freebsd.c | 1 + sys/netgraph/netflow/netflow.c | 1 + sys/netgraph/netflow/netflow_v9.c | 1 + sys/netgraph/netflow/ng_netflow.c | 1 + sys/netinet/in_pcb.c | 1 + sys/netinet/ip_encap.c | 2 ++ sys/netinet/ip_mroute.c | 1 + sys/netinet/raw_ip.c | 1 + sys/netinet/tcp_reass.c | 1 + sys/netinet/tcp_subr.c | 1 + sys/netinet6/frag6.c | 1 + sys/netpfil/pf/pf_if.c | 1 + 13 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sys/net/route.h b/sys/net/route.h index 473e402ecdd..25d45f4ff06 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -467,7 +467,6 @@ int rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t, struct rt_addrinfo *); void rib_free_info(struct rt_addrinfo *info); -#include #endif #endif diff --git a/sys/net80211/ieee80211_freebsd.c b/sys/net80211/ieee80211_freebsd.c index 7f2eae5dcfc..0ccf378fff6 100644 --- a/sys/net80211/ieee80211_freebsd.c +++ b/sys/net80211/ieee80211_freebsd.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 644d46f7a71..87ff1890902 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netgraph/netflow/netflow_v9.c b/sys/netgraph/netflow/netflow_v9.c index fe85f42ebbe..2fc700d5d4f 100644 --- a/sys/netgraph/netflow/netflow_v9.c +++ b/sys/netgraph/netflow/netflow_v9.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netgraph/netflow/ng_netflow.c b/sys/netgraph/netflow/ng_netflow.c index b5a82a41802..b524ca51fcb 100644 --- a/sys/netgraph/netflow/ng_netflow.c +++ b/sys/netgraph/netflow/ng_netflow.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 94544265d24..ec42e67d594 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netinet/ip_encap.c b/sys/netinet/ip_encap.c index d3d011656ea..f186fe2f7a5 100644 --- a/sys/netinet/ip_encap.c +++ b/sys/netinet/ip_encap.c @@ -65,6 +65,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index ffa88ae43ac..9b762d6a631 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 4e9fedaac81..2eecb95c7ba 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 46771545956..a22fb3081d6 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9f21f116f16..d68a8a687d1 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index 2f2b86997ad..a2f66353471 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c index a2c9c7eca1b..6b25f5fe840 100644 --- a/sys/netpfil/pf/pf_if.c +++ b/sys/netpfil/pf/pf_if.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include From 5dba456c143612532ca3338dfc9edb3aed9a7934 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 9 Jan 2016 11:41:37 +0000 Subject: [PATCH 29/67] Remove prefix check from in6_addroute(). This check was added in initial? netinet6/ import back in 1999 (r53541). It effectively became unnecessary after 'address/prefix clean-ups' KAME commit 90ff8792e676132096a440dd787f99a5a5860ee8 (github) in 2001 (merged to FreeBSD in r78064) where prefix check was added to nd6_prefix_onlink(). Similar IPv4 check (in_addroute() was added in r137628). Additionally, the right plance for this (or similar) check is the prefix addition code (nd6_prefix_onlink(), nd6_prefix_onlink_rtrequest(), in_addprefix() or rtinit()), but not the generic radix insert routine. --- sys/netinet6/in6_rmx.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 97e1216247a..93c786c7d7a 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -107,7 +107,6 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); - struct radix_node *ret; RADIX_NODE_HEAD_WLOCK_ASSERT(head); if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) @@ -148,34 +147,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp); } - ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL) { - struct rtentry *rt2; - /* - * We are trying to add a net route, but can't. - * The following case should be allowed, so we'll make a - * special check for this: - * Two IPv6 addresses with the same prefix is assigned - * to a single interrface. - * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) - * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) - * In this case, (*1) and (*2) want to add the same - * net route entry, 3ffe:0501:: -> if0. - * This case should not raise an error. - */ - rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED, - rt->rt_fibnum); - if (rt2) { - if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) - && rt2->rt_gateway - && rt2->rt_gateway->sa_family == AF_LINK - && rt2->rt_ifp == rt->rt_ifp) { - ret = rt2->rt_nodes; - } - RTFREE_LOCKED(rt2); - } - } - return (ret); + return (rn_addroute(v_arg, n_arg, head, treenodes)); } /* From ab707ec6681d72550f820bfc8039d3e4a595e0d0 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Sat, 9 Jan 2016 14:53:23 +0000 Subject: [PATCH 30/67] - Add the definition of CHARCLASS_NAME_MAX, as per POSIX.1-2001. - Avoid namespace pollution and move definitions of _POSIX2_CHARCLASS_NAME_MAX and _POSIX2_COLL_WEIGHTS_MAX into the .2001 section. With input from bde. Submitted by bde --- include/limits.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/limits.h b/include/limits.h index 3910928315f..a1e1b982082 100644 --- a/include/limits.h +++ b/include/limits.h @@ -59,10 +59,12 @@ #define _POSIX_TZNAME_MAX 3 #endif +#if __POSIX_VISIBLE >= 200112 #define BC_BASE_MAX 99 /* max ibase/obase values in bc(1) */ #define BC_DIM_MAX 2048 /* max array elements in bc(1) */ #define BC_SCALE_MAX 99 /* max scale value in bc(1) */ #define BC_STRING_MAX 1000 /* max const string length in bc(1) */ +#define CHARCLASS_NAME_MAX 14 /* max character class name size */ #define COLL_WEIGHTS_MAX 10 /* max weights for order keyword */ #define EXPR_NEST_MAX 32 /* max expressions nested in expr(1) */ #define LINE_MAX 2048 /* max bytes in an input line */ @@ -72,11 +74,14 @@ #define _POSIX2_BC_DIM_MAX 2048 #define _POSIX2_BC_SCALE_MAX 99 #define _POSIX2_BC_STRING_MAX 1000 +#define _POSIX2_CHARCLASS_NAME_MAX 14 +#define _POSIX2_COLL_WEIGHTS_MAX 2 #define _POSIX2_EQUIV_CLASS_MAX 2 #define _POSIX2_EXPR_NEST_MAX 32 #define _POSIX2_LINE_MAX 2048 #define _POSIX2_RE_DUP_MAX 255 #endif +#endif #if __POSIX_VISIBLE >= 199309 #define _POSIX_AIO_LISTIO_MAX 2 @@ -110,8 +115,6 @@ #define _POSIX_TRACE_SYS_MAX 8 #define _POSIX_TRACE_USER_EVENT_MAX 32 #define _POSIX_TTY_NAME_MAX 9 -#define _POSIX2_CHARCLASS_NAME_MAX 14 -#define _POSIX2_COLL_WEIGHTS_MAX 2 #define _POSIX_RE_DUP_MAX _POSIX2_RE_DUP_MAX #endif From 36402a681f08433cc28c2fde1753b66805dc5cc0 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 9 Jan 2016 16:34:37 +0000 Subject: [PATCH 31/67] Finish r275196: do not dereference rtentry in if_output() routines. The only piece of information that is required is rt_flags subset. In particular, if_loop() requires RTF_REJECT and RTF_BLACKHOLE flags to check if this particular mbuf needs to be dropped (and what error should be returned). Note that if_loop() will always return EHOSTUNREACH for "reject" routes regardless of RTF_HOST flag existence. This is due to upcoming routing changes where RTF_HOST value won't be available as lookup result. All other functions require RTF_GATEWAY flag to check if they need to return EHOSTUNREACH instead of EHOSTDOWN error. There are 11 places where non-zero 'struct route' is passed to if_output(). For most of the callers (forwarding, bpf, arp) does not care about exact error value. In fact, the only place where this result is propagated is ip_output(). (ip6_output() passes NULL route to nd6_output_ifp()). Given that, add 3 new 'struct route' flags (RT_REJECT, RT_BLACKHOLE and RT_IS_GW) and inline function (rt_update_ro_flags()) to copy necessary rte flags to ro_flags. Call this function in ip_output() after looking up/ verifying rte. Reviewed by: ae --- sys/net/if_arcsubr.c | 5 ++-- sys/net/if_ethersubr.c | 4 +--- sys/net/if_fddisubr.c | 5 ++-- sys/net/if_fwsubr.c | 9 ++------ sys/net/if_iso88025subr.c | 8 ++----- sys/net/if_loop.c | 8 ++----- sys/net/route.h | 23 ++++++++++++++++--- sys/netinet/ip_output.c | 1 + .../drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++----- 9 files changed, 34 insertions(+), 37 deletions(-) diff --git a/sys/net/if_arcsubr.c b/sys/net/if_arcsubr.c index 4944e970e1c..16adba4e6cb 100644 --- a/sys/net/if_arcsubr.c +++ b/sys/net/if_arcsubr.c @@ -113,9 +113,8 @@ arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, error = 0; #if defined(INET) || defined(INET6) - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #endif switch (dst->sa_family) { diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 2b82ecc2045..2d652ad8fc3 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -202,7 +202,6 @@ ether_resolve_addr(struct ifnet *ifp, struct mbuf *m, uint32_t *pflags) { struct ether_header *eh; - struct rtentry *rt; uint32_t lleflags = 0; int error = 0; #if defined(INET) || defined(INET6) @@ -253,8 +252,7 @@ ether_resolve_addr(struct ifnet *ifp, struct mbuf *m, } if (error == EHOSTDOWN) { - rt = (ro != NULL) ? ro->ro_rt : NULL; - if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) + if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0) error = EHOSTUNREACH; } diff --git a/sys/net/if_fddisubr.c b/sys/net/if_fddisubr.c index 81b65a63466..84ee669ae25 100644 --- a/sys/net/if_fddisubr.c +++ b/sys/net/if_fddisubr.c @@ -119,9 +119,8 @@ fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, getmicrotime(&ifp->if_lastchange); #if defined(INET) || defined(INET6) - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #endif switch (dst->sa_family) { diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c index 626b1cb856f..a070f617602 100644 --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -106,9 +106,8 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, } #if defined(INET) || defined(INET6) - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #endif /* * For unicast, we make a tag to store the lladdr of the @@ -145,10 +144,6 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, * doesn't fit into the arp model. */ if (unicast) { - is_gw = 0; - if (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; error = arpresolve(ifp, is_gw, m, dst, (u_char *) destfw, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); diff --git a/sys/net/if_iso88025subr.c b/sys/net/if_iso88025subr.c index 7192998ac05..466784fb075 100644 --- a/sys/net/if_iso88025subr.c +++ b/sys/net/if_iso88025subr.c @@ -214,12 +214,8 @@ iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt0 = NULL; int is_gw = 0; - if (ro != NULL) { - rt0 = ro->ro_rt; - if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; - } - + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c index f4ac8b4e729..1291f7b44d9 100644 --- a/sys/net/if_loop.c +++ b/sys/net/if_loop.c @@ -202,15 +202,12 @@ looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int32_t af; - struct rtentry *rt = NULL; #ifdef MAC int error; #endif M_ASSERTPKTHDR(m); /* check if we have the packet header */ - if (ro != NULL) - rt = ro->ro_rt; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { @@ -219,10 +216,9 @@ looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, } #endif - if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + if (ro != NULL && ro->ro_flags & (RT_REJECT|RT_BLACKHOLE)) { m_freem(m); - return (rt->rt_flags & RTF_BLACKHOLE ? 0 : - rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + return (ro->ro_flags & RT_BLACKHOLE ? 0 : EHOSTUNREACH); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); diff --git a/sys/net/route.h b/sys/net/route.h index 25d45f4ff06..ae59efa7aad 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -64,9 +64,13 @@ struct route { #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */ #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ -#define RT_L2_ME (1 << RT_L2_ME_BIT) -#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) -#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) +#define RT_L2_ME (1 << RT_L2_ME_BIT) /* 0x0004 */ +#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) /* 0x0008 */ +#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) /* 0x0010 */ + +#define RT_REJECT 0x0020 /* Destination is reject */ +#define RT_BLACKHOLE 0x0040 /* Destination is blackhole */ +#define RT_HAS_GW 0x0080 /* Destination has GW */ struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ @@ -215,6 +219,19 @@ fib_rte_to_nh_flags(int rt_flags) return (res); } +/* rte<>ro_flags translation */ +static inline void +rt_update_ro_flags(struct route *ro) +{ + int rt_flags = ro->ro_rt->rt_flags; + + ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW); + + ro->ro_flags = (rt_flags & RTF_REJECT) ? RT_REJECT : 0; + ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0; + ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0; +} + /* * Routing statistics. */ diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 7ad43a229db..fa225fc0c85 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -376,6 +376,7 @@ again: ia = ifatoia(rte->rt_ifa); ifp = rte->rt_ifp; counter_u64_add(rte->rt_pksent, 1); + rt_update_ro_flags(ro); if (rte->rt_flags & RTF_GATEWAY) gw = (struct sockaddr_in *)rte->rt_gateway; if (rte->rt_flags & RTF_HOST) diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c index e1f2dc082d3..6326bd211e1 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1260,16 +1260,12 @@ ipoib_output(struct ifnet *ifp, struct mbuf *m, #if defined(INET) || defined(INET6) struct llentry *lle = NULL; #endif - struct rtentry *rt0 = NULL; struct ipoib_header *eh; int error = 0, is_gw = 0; short type; - if (ro != NULL) { - rt0 = ro->ro_rt; - if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; - } + if (ro != NULL) + is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) From 64e9493420da01b8b41aa5e58146b1ec0f15d980 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 9 Jan 2016 18:42:12 +0000 Subject: [PATCH 32/67] Fix userland build broken by r293470. Pointy hat to: melifaro --- sys/net/route.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/net/route.h b/sys/net/route.h index ae59efa7aad..97709f17247 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -219,6 +219,7 @@ fib_rte_to_nh_flags(int rt_flags) return (res); } +#ifdef _KERNEL /* rte<>ro_flags translation */ static inline void rt_update_ro_flags(struct route *ro) @@ -231,6 +232,7 @@ rt_update_ro_flags(struct route *ro) ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0; ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0; } +#endif /* * Routing statistics. From d036e72f4bd4f7be002de2218a542709b4f55763 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Sat, 9 Jan 2016 19:13:25 +0000 Subject: [PATCH 33/67] Return call to init_zfs_bootenv to its previous location When called to early, new_currdev->d_type was not yet set zfs_fmtdev() would then return null While here, guard call to init_zfs_bootenv with if d_type == DEVT_ZFS Reported by: tsoome at me.com MFC after: 3 days Sponsored by: ScaleEngine Inc. --- sys/boot/i386/loader/main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sys/boot/i386/loader/main.c b/sys/boot/i386/loader/main.c index 2b30b92161f..644747e8151 100644 --- a/sys/boot/i386/loader/main.c +++ b/sys/boot/i386/loader/main.c @@ -262,7 +262,6 @@ extract_currdev(void) new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.d_dev = &zfs_dev; - init_zfs_bootenv(zfs_fmtdev(&new_currdev)); #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ @@ -296,6 +295,11 @@ extract_currdev(void) new_currdev.d_unit = 0; } +#ifdef LOADER_ZFS_SUPPORT + if (new_currdev.d_type == DEVT_ZFS) + init_zfs_bootenv(zfs_fmtdev(&new_currdev)); +#endif + env_setenv("currdev", EV_VOLATILE, i386_fmtdev(&new_currdev), i386_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset, From 038c720553fa05b557a30973204b53b5202fa69c Mon Sep 17 00:00:00 2001 From: Dmitry Chagin Date: Sat, 9 Jan 2016 20:18:53 +0000 Subject: [PATCH 34/67] Implement vsyscall hack. Prior to 2.13 glibc uses vsyscall instead of vdso. An upcoming linux_base-c6 needs it. Differential Revision: https://reviews.freebsd.org/D1090 Reviewed by: kib, trasz MFC after: 1 week --- sys/amd64/amd64/elf_machdep.c | 1 + sys/amd64/amd64/trap.c | 7 ++++ sys/amd64/linux/linux_sysvec.c | 51 ++++++++++++++++++++++++++++- sys/amd64/linux32/linux32_sysvec.c | 1 + sys/arm/arm/elf_machdep.c | 1 + sys/arm64/arm64/elf_machdep.c | 2 ++ sys/compat/ia32/ia32_sysvec.c | 1 + sys/compat/svr4/svr4_sysvec.c | 1 + sys/i386/i386/elf_machdep.c | 1 + sys/i386/ibcs2/ibcs2_sysvec.c | 1 + sys/i386/linux/linux_sysvec.c | 2 ++ sys/kern/imgact_aout.c | 1 + sys/kern/init_main.c | 1 + sys/mips/mips/elf_machdep.c | 2 ++ sys/mips/mips/freebsd32_machdep.c | 1 + sys/powerpc/powerpc/elf32_machdep.c | 1 + sys/powerpc/powerpc/elf64_machdep.c | 1 + sys/sparc64/sparc64/elf_machdep.c | 1 + sys/sys/sysent.h | 1 + 19 files changed, 77 insertions(+), 1 deletion(-) diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index c6520f37d23..ca07adc7cad 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -80,6 +80,7 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 776f90c6fb8..620a46185f6 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -322,6 +322,13 @@ trap(struct trapframe *frame) break; case T_PAGEFLT: /* page fault */ + /* + * Emulator can take care about this trap? + */ + if (*p->p_sysent->sv_trap != NULL && + (*p->p_sysent->sv_trap)(td) == 0) + goto userout; + addr = frame->tf_addr; i = trap_pfault(frame, TRUE); if (i == -1) diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c index 96428b840a4..d49ca79256d 100644 --- a/sys/amd64/linux/linux_sysvec.c +++ b/sys/amd64/linux/linux_sysvec.c @@ -129,6 +129,7 @@ static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); +static int linux_vsyscall(struct thread *td); /* * Linux syscalls return negative errno's, we do positive and map them @@ -746,6 +747,53 @@ exec_linux_imgact_try(struct image_params *imgp) return(error); } +#define LINUX_VSYSCALL_START (-10UL << 20) +#define LINUX_VSYSCALL_SZ 1024 + +const unsigned long linux_vsyscall_vector[] = { + LINUX_SYS_gettimeofday, + LINUX_SYS_linux_time, + /* getcpu not implemented */ +}; + +static int +linux_vsyscall(struct thread *td) +{ + struct trapframe *frame; + uint64_t retqaddr; + int code, traced; + int error; + + frame = td->td_frame; + + /* Check %rip for vsyscall area */ + if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START)) + return (EINVAL); + if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0) + return (EINVAL); + code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ; + if (code >= nitems(linux_vsyscall_vector)) + return (EINVAL); + + /* + * vsyscall called as callq *(%rax), so we must + * use return address from %rsp and also fixup %rsp + */ + error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr)); + if (error) + return (error); + + frame->tf_rip = retqaddr; + frame->tf_rax = linux_vsyscall_vector[code]; + frame->tf_rsp += 8; + + traced = (frame->tf_flags & PSL_T); + + amd64_syscall(td, traced); + + return (0); +} + struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, @@ -778,7 +826,8 @@ struct sysentvec elf_linux_sysvec = { .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, - .sv_thread_detach = linux_thread_detach + .sv_thread_detach = linux_thread_detach, + .sv_trap = linux_vsyscall, }; static void diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c index 693f0033cea..7d3615eb752 100644 --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -1040,6 +1040,7 @@ struct sysentvec elf_linux_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, + .sv_trap = NULL, }; static void diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index 02f7128db49..84b87f7822f 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -86,6 +86,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c index 9ba2541432f..fb2c163a936 100644 --- a/sys/arm64/arm64/elf_machdep.c +++ b/sys/arm64/arm64/elf_machdep.c @@ -87,6 +87,8 @@ static struct sysentvec elf64_freebsd_sysvec = { .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, + .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c index 1a6dd72e4d9..f2015705fcf 100644 --- a/sys/compat/ia32/ia32_sysvec.c +++ b/sys/compat/ia32/ia32_sysvec.c @@ -134,6 +134,7 @@ struct sysentvec ia32_freebsd_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec); diff --git a/sys/compat/svr4/svr4_sysvec.c b/sys/compat/svr4/svr4_sysvec.c index f37d8cb856b..c9ceca7d301 100644 --- a/sys/compat/svr4/svr4_sysvec.c +++ b/sys/compat/svr4/svr4_sysvec.c @@ -194,6 +194,7 @@ struct sysentvec svr4_sysvec = { .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; const char svr4_emul_path[] = "/compat/svr4"; diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index b11cb03d342..3c76ab2049b 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -87,6 +87,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); diff --git a/sys/i386/ibcs2/ibcs2_sysvec.c b/sys/i386/ibcs2/ibcs2_sysvec.c index 16507ee2afa..372e5ea1cc4 100644 --- a/sys/i386/ibcs2/ibcs2_sysvec.c +++ b/sys/i386/ibcs2/ibcs2_sysvec.c @@ -90,6 +90,7 @@ struct sysentvec ibcs2_svr3_sysvec = { .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static int diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c index 2a8b7d27d56..cc5fcaab394 100644 --- a/sys/i386/linux/linux_sysvec.c +++ b/sys/i386/linux/linux_sysvec.c @@ -985,6 +985,7 @@ struct sysentvec linux_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, + .sv_trap = NULL, }; INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); @@ -1021,6 +1022,7 @@ struct sysentvec elf_linux_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, + .sv_trap = NULL, }; static void diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index a7db17e753f..cbde46b5acd 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -97,6 +97,7 @@ struct sysentvec aout_sysvec = { .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; #elif defined(__amd64__) diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index f0cd3c8f3ab..8d5580b6d04 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -414,6 +414,7 @@ struct sysentvec null_sysvec = { .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; /* diff --git a/sys/mips/mips/elf_machdep.c b/sys/mips/mips/elf_machdep.c index 2b50015f308..b332bf37d5d 100644 --- a/sys/mips/mips/elf_machdep.c +++ b/sys/mips/mips/elf_machdep.c @@ -81,6 +81,7 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static Elf64_Brandinfo freebsd_brand_info = { @@ -135,6 +136,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static Elf32_Brandinfo freebsd_brand_info = { diff --git a/sys/mips/mips/freebsd32_machdep.c b/sys/mips/mips/freebsd32_machdep.c index 7726ecd2de4..f4ace04d7e9 100644 --- a/sys/mips/mips/freebsd32_machdep.c +++ b/sys/mips/mips/freebsd32_machdep.c @@ -104,6 +104,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_syscallnames = freebsd32_syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c index 027105ddfd3..eed76c908ae 100644 --- a/sys/powerpc/powerpc/elf32_machdep.c +++ b/sys/powerpc/powerpc/elf32_machdep.c @@ -106,6 +106,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c index 261660a7be5..032728c67b6 100644 --- a/sys/powerpc/powerpc/elf64_machdep.c +++ b/sys/powerpc/powerpc/elf64_machdep.c @@ -85,6 +85,7 @@ struct sysentvec elf64_freebsd_sysvec_v1 = { .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec_v1, &elf64_freebsd_sysvec_v1); diff --git a/sys/sparc64/sparc64/elf_machdep.c b/sys/sparc64/sparc64/elf_machdep.c index eea21a25778..c2b0d26c52f 100644 --- a/sys/sparc64/sparc64/elf_machdep.c +++ b/sys/sparc64/sparc64/elf_machdep.c @@ -85,6 +85,7 @@ static struct sysentvec elf64_freebsd_sysvec = { .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; static Elf64_Brandinfo freebsd_brand_info = { diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index d5b88ebe9eb..a79ff04bb23 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -129,6 +129,7 @@ struct sysentvec { void *sv_shared_page_obj; void (*sv_schedtail)(struct thread *); void (*sv_thread_detach)(struct thread *); + int (*sv_trap)(struct thread *); }; #define SV_ILP32 0x000100 /* 32-bit executable. */ From bb0455d7ddc57cdd19586ceb69a516887e1c2f46 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Sat, 9 Jan 2016 21:28:56 +0000 Subject: [PATCH 35/67] Make graphical consoles work under PowerKVM. Without using hypercalls, it is not possible to write the framebuffer before pmap is up. Solve this by deferring initialization until that happens, like on PS3. MFC after: 1 week --- sys/dev/vt/hw/ofwfb/ofwfb.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/sys/dev/vt/hw/ofwfb/ofwfb.c b/sys/dev/vt/hw/ofwfb/ofwfb.c index acad5d4d86c..c3ac185c910 100644 --- a/sys/dev/vt/hw/ofwfb/ofwfb.c +++ b/sys/dev/vt/hw/ofwfb/ofwfb.c @@ -57,6 +57,7 @@ struct ofwfb_softc { int iso_palette; }; +static void ofwfb_initialize(struct vt_device *vd); static vd_probe_t ofwfb_probe; static vd_init_t ofwfb_init; static vd_bitblt_text_t ofwfb_bitblt_text; @@ -124,6 +125,18 @@ ofwfb_bitblt_bitmap(struct vt_device *vd, const struct vt_window *vw, uint8_t c[4]; } ch1, ch2; +#ifdef __powerpc__ + /* Deal with unmapped framebuffers */ + if (sc->fb_flags & FB_FLAG_NOWRITE) { + if (pmap_bootstrapped) { + sc->fb_flags &= ~FB_FLAG_NOWRITE; + ofwfb_initialize(vd); + } else { + return; + } + } +#endif + fgc = sc->fb_cmap[fg]; bgc = sc->fb_cmap[bg]; b = m = 0; @@ -271,6 +284,11 @@ ofwfb_initialize(struct vt_device *vd) cell_t retval; uint32_t oldpix; + sc->fb.fb_cmsize = 16; + + if (sc->fb.fb_flags & FB_FLAG_NOWRITE) + return; + /* * Set up the color map */ @@ -318,8 +336,6 @@ ofwfb_initialize(struct vt_device *vd) panic("Unknown color space depth %d", sc->fb.fb_bpp); break; } - - sc->fb.fb_cmsize = 16; } static int @@ -466,6 +482,11 @@ ofwfb_init(struct vt_device *vd) #if defined(__powerpc__) OF_decode_addr(node, fb_phys, &sc->sc_memt, &sc->fb.fb_vbase); sc->fb.fb_pbase = sc->fb.fb_vbase; /* 1:1 mapped */ + #ifdef __powerpc64__ + /* Real mode under a hypervisor probably doesn't cover FB */ + if (!(mfmsr() & (PSL_HV | PSL_DR))) + sc->fb.fb_flags |= FB_FLAG_NOWRITE; + #endif #else /* No ability to interpret assigned-addresses otherwise */ return (CN_DEAD); From 950678b4889c08d6e96303b66ee5577c1a64f1b8 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 9 Jan 2016 21:45:21 +0000 Subject: [PATCH 36/67] rtwn: fix sequence number assignment (part of r290630) Reviewed by: kevlo Approved by: adrian (mentor) Differential Revision: https://reviews.freebsd.org/D4819 --- sys/dev/rtwn/if_rtwn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/rtwn/if_rtwn.c b/sys/dev/rtwn/if_rtwn.c index ec17cdc6572..927a8c88f38 100644 --- a/sys/dev/rtwn/if_rtwn.c +++ b/sys/dev/rtwn/if_rtwn.c @@ -1683,7 +1683,7 @@ rtwn_tx(struct rtwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 0)); } /* Set sequence number (already little endian). */ - txd->txdseq = *(uint16_t *)wh->i_seq; + txd->txdseq = htole16(M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE); if (!qos) { /* Use HW sequence numbering for non-QoS frames. */ From 7cf0da249ac184f03100144fa2a795c184b16660 Mon Sep 17 00:00:00 2001 From: Devin Teske Date: Sat, 9 Jan 2016 23:13:43 +0000 Subject: [PATCH 37/67] Fix improper duration for f_dialog_pause() API MFC after: 3 days X-MFC-to: stable/10 --- usr.sbin/bsdconfig/share/dialog.subr | 1 - 1 file changed, 1 deletion(-) diff --git a/usr.sbin/bsdconfig/share/dialog.subr b/usr.sbin/bsdconfig/share/dialog.subr index d7c2d2c3dfb..1e63aec593f 100644 --- a/usr.sbin/bsdconfig/share/dialog.subr +++ b/usr.sbin/bsdconfig/share/dialog.subr @@ -1605,7 +1605,6 @@ f_dialog_pause() $height $width else [ $duration -gt 0 ] && duration=$(( $duration - 1 )) - [ $duration -gt 1 ] && duration=$(( $duration - 1 )) height=$(( $height + 3 )) # Add height for progress bar $DIALOG \ --title "$DIALOG_TITLE" \ From 0f0306933aa1feb3f5a342179b0128e8745be768 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Sat, 9 Jan 2016 23:45:25 +0000 Subject: [PATCH 38/67] - Delete non-TAP testcases - Add a conf.sh file for executing common functions with geom_gate - Use attach_md for attaching md(4) devices - Don't hardcode /tmp for temporary files, which violates the kyua sandbox - Add/increase sleeps to try and improve synchronization - Add debug output for when checksums fail test-1.t: - Use pkill for killing ggated MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_gate/runtests.sh | 8 --- tools/regression/geom_gate/test-1.sh | 36 ------------ tools/regression/geom_gate/test-1.t | 76 +++++++++++++++++--------- tools/regression/geom_gate/test-2.sh | 28 ---------- tools/regression/geom_gate/test-2.t | 47 +++++++++++----- tools/regression/geom_gate/test-3.sh | 31 ----------- tools/regression/geom_gate/test-3.t | 50 +++++++++++------ 7 files changed, 113 insertions(+), 163 deletions(-) delete mode 100644 tools/regression/geom_gate/runtests.sh delete mode 100644 tools/regression/geom_gate/test-1.sh delete mode 100644 tools/regression/geom_gate/test-2.sh delete mode 100644 tools/regression/geom_gate/test-3.sh diff --git a/tools/regression/geom_gate/runtests.sh b/tools/regression/geom_gate/runtests.sh deleted file mode 100644 index 38089c5daed..00000000000 --- a/tools/regression/geom_gate/runtests.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -dir=`dirname $0` - -for ts in `dirname $0`/test-*.sh; do - sh $ts -done diff --git a/tools/regression/geom_gate/test-1.sh b/tools/regression/geom_gate/test-1.sh deleted file mode 100644 index 44a49606ff4..00000000000 --- a/tools/regression/geom_gate/test-1.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" -conf=`mktemp /tmp/$base.XXXXXX` || exit 1 - -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` - -echo "127.0.0.1 RW $work" > $conf -ggated $conf -ggatec create -u $us 127.0.0.1 $work - -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 - -if [ `cat $work | md5 -q` != $sum ]; then - echo "FAIL" -else - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo "FAIL" - else - echo "PASS" - fi -fi - -ggatec destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -pkill ggated $conf -rm -f $conf diff --git a/tools/regression/geom_gate/test-1.t b/tools/regression/geom_gate/test-1.t index 279e3163361..83f609602ed 100644 --- a/tools/regression/geom_gate/test-1.t +++ b/tools/regression/geom_gate/test-1.t @@ -1,40 +1,62 @@ #!/bin/sh # $FreeBSD$ -base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" -conf=`mktemp /tmp/$base.XXXXXX` || exit 1 - -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` - -echo "127.0.0.1 RW $work" > $conf -ggated $conf -ggatec create -u $us 127.0.0.1 $work - -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 +. `dirname $0`/conf.sh echo '1..2' -if [ `cat $work | md5 -q` != $sum ]; then - echo 'not ok 1 - md5 checksum' +base=`basename $0` +us=0 +while [ -c /dev/ggate${us} ]; do + : $(( us += 1 )) +done +conf=`mktemp $base.XXXXXX` || exit 1 +pidfile=/var/run/ggated.pid +port=33080 + +work=$(attach_md -t malloc -s 1M) +src=$(attach_md -t malloc -s 1M) + +test_cleanup() +{ + ggatec destroy -f -u $us + pkill -F $pidfile + geom_test_cleanup +} +trap test_cleanup ABRT EXIT INT TERM + +dd if=/dev/random of=/dev/$work bs=1m count=1 conv=sync +dd if=/dev/random of=/dev/$src bs=1m count=1 conv=sync +src_checksum=$(md5 -q /dev/$src) + +echo "127.0.0.1 RW /dev/$work" > $conf + +if ! ggated -p $port $conf; then + echo 'ggated failed to start' + echo 'Bail out!' + exit 1 +fi +sleep 1 +if ! ggatec create -p $port -u $us 127.0.0.1 /dev/$work; then + echo 'ggatec create failed' + echo 'Bail out!' + exit 1 +fi + +dd if=/dev/${src} of=/dev/ggate${us} bs=1m count=1 +sleep 1 + +work_checksum=$(md5 -q /dev/$work) +if [ "$work_checksum" != "$src_checksum" ]; then + echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum)" + echo "not ok 2 # SKIP" else echo 'ok 1 - md5 checksum' - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo 'not ok 2 - md5 checksum' + ggate_checksum=$(md5 -q /dev/ggate${us}) + if [ "$ggate_checksum" != "$src_checksum" ]; then + echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)" else echo 'ok 2 - md5 checksum' fi fi - -ggatec destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` -pkill ggated $conf -rm -f $conf diff --git a/tools/regression/geom_gate/test-2.sh b/tools/regression/geom_gate/test-2.sh deleted file mode 100644 index 498ac45d005..00000000000 --- a/tools/regression/geom_gate/test-2.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -base=`basename $0` -us=45 -work=`mktemp /tmp/$base.XXXXXX` || exit 1 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 - -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`md5 -q $src` - -ggatel create -u $us $work - -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 - -if [ `md5 -q $work` != $sum ]; then - echo "FAIL" -else - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo "FAIL" - else - echo "PASS" - fi -fi - -ggatel destroy -u $us -rm -f $work $src diff --git a/tools/regression/geom_gate/test-2.t b/tools/regression/geom_gate/test-2.t index 2e5e2544006..be89accfcb9 100644 --- a/tools/regression/geom_gate/test-2.t +++ b/tools/regression/geom_gate/test-2.t @@ -1,31 +1,48 @@ #!/bin/sh # $FreeBSD$ +. `dirname $0`/conf.sh + base=`basename $0` -us=45 -work=`mktemp /tmp/$base.XXXXXX` || exit 1 -src=`mktemp /tmp/$base.XXXXXX` || exit 1 +us=46 +work=`mktemp -u $base.XXXXXX` || exit 1 +src=`mktemp -u $base.XXXXXX` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`md5 -q $src` +test_cleanup() +{ + ggatel destroy -f -u $us + rm -f $work $src -ggatel create -u $us $work + geom_test_cleanup +} +trap test_cleanup ABRT EXIT INT TERM -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 +dd if=/dev/random of=$work bs=1m count=1 conv=sync +dd if=/dev/random of=$src bs=1m count=1 conv=sync + +if ! ggatel create -u $us $work; then + echo 'ggatel create failed' + echo 'Bail out!' + exit 1 +fi + +dd if=${src} of=/dev/ggate${us} bs=1m count=1 +sleep 1 echo '1..2' -if [ `md5 -q $work` != $sum ]; then - echo 'not ok 1 - md5 checksum' +src_checksum=$(md5 -q $src) +work_checksum=$(md5 -q $work) +if [ "$work_checksum" != "$src_checksum" ]; then + echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum) # TODO: bug 204616" + echo 'not ok 2 # SKIP' else echo 'ok 1 - md5 checksum' - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo 'not ok 2 - md5 checksum' + + ggate_checksum=$(md5 -q /dev/ggate${us}) + if [ "$ggate_checksum" != "$src_checksum" ]; then + echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)" else echo 'ok 2 - md5 checksum' fi fi - -ggatel destroy -u $us -rm -f $work $src diff --git a/tools/regression/geom_gate/test-3.sh b/tools/regression/geom_gate/test-3.sh deleted file mode 100644 index ca73a5a26ba..00000000000 --- a/tools/regression/geom_gate/test-3.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/sh -# $FreeBSD$ - -base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" - -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` - -ggatel create -u $us $work - -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 - -if [ `cat $work | md5 -q` != $sum ]; then - echo "FAIL" -else - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo "FAIL" - else - echo "PASS" - fi -fi - -ggatel destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` diff --git a/tools/regression/geom_gate/test-3.t b/tools/regression/geom_gate/test-3.t index ba2b3c7de34..8901aca6945 100644 --- a/tools/regression/geom_gate/test-3.t +++ b/tools/regression/geom_gate/test-3.t @@ -1,34 +1,48 @@ #!/bin/sh # $FreeBSD$ +. `dirname $0`/conf.sh + base=`basename $0` -us=45 -work="/dev/md${us}" -src="/dev/md`expr $us + 1`" +us=47 -mdconfig -a -t malloc -s 1M -u $us || exit 1 -mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1 -dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1 -dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1 -sum=`cat $src | md5 -q` +test_cleanup() +{ + ggatel destroy -f -u $us -ggatel create -u $us $work + geom_test_cleanup +} +trap test_cleanup ABRT EXIT INT TERM -dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1 +work=$(attach_md -t malloc -s 1M) +src=$(attach_md -t malloc -s 1M) + +dd if=/dev/random of=/dev/$work bs=1m count=1 conv=sync +dd if=/dev/random of=/dev/$src bs=1m count=1 conv=sync +src_checksum=$(md5 -q /dev/$src) + +if ! ggatel create -u $us /dev/$work; then + echo 'ggatel create failed' + echo 'Bail out!' + exit 1 +fi + +dd if=/dev/${src} of=/dev/ggate${us} bs=1m count=1 conv=sync +sleep 1 echo '1..2' -if [ `cat $work | md5 -q` != $sum ]; then - echo 'not ok 1 - md5 checksum' +work_checksum=$(md5 -q /dev/$work) +if [ "$work_checksum" != "$src_checksum" ]; then + echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum)" + echo 'not ok 2 # SKIP' else echo 'ok 1 - md5 checksum' - if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then - echo 'not ok 2 - md5 checksum' + + ggate_checksum=$(md5 -q /dev/ggate${us}) + if [ "$ggate_checksum" != "$src_checksum" ]; then + echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)" else echo 'ok 2 - md5 checksum' fi fi - -ggatel destroy -u $us -mdconfig -d -u $us -mdconfig -d -u `expr $us + 1` From 8084a52400c930a299601a05848cdee81085b286 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Sat, 9 Jan 2016 23:46:52 +0000 Subject: [PATCH 39/67] Remove Makefile now that the testcases are all TAP based and prove -rv can be used on them MFC after: 3 weeks Sponsored by: EMC / Isilon Storage Division --- tools/regression/geom_gate/Makefile | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 tools/regression/geom_gate/Makefile diff --git a/tools/regression/geom_gate/Makefile b/tools/regression/geom_gate/Makefile deleted file mode 100644 index c5c24adaa4d..00000000000 --- a/tools/regression/geom_gate/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# $FreeBSD$ -# -# Regression tests for geom_gate. -# - -test: - @sh runtests.sh From 367382849026daee1c8e5990948a4b5eaeaf20c1 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 10 Jan 2016 00:28:44 +0000 Subject: [PATCH 40/67] Use already pre-calculated number of entries instead of tc->count. --- sys/netpfil/ipfw/ip_fw_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index 71c96b278d3..a1ee5dd95c5 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -2097,7 +2097,7 @@ export_table_info(struct ip_fw_chain *ch, struct table_config *tc, i->count = table_get_count(ch, tc); i->limit = tc->limit; i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; - i->size = tc->count * sizeof(ipfw_obj_tentry); + i->size = i->count * sizeof(ipfw_obj_tentry); i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); ti = KIDX_TO_TI(ch, tc->no.kidx); From 004d3e30a7d03fc7a6b57f002a2b0bdea99cefa7 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 10 Jan 2016 06:43:43 +0000 Subject: [PATCH 41/67] Make ipfw addr:kfib lookup algo use new routing KPI. --- sys/netpfil/ipfw/ip_fw_table_algo.c | 121 +++++++++++++++++----------- 1 file changed, 72 insertions(+), 49 deletions(-) diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index 06a46410813..e6b7f6f53a1 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -53,8 +53,10 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include /* struct ipfw_rule_ref */ #include +#include #include #include @@ -3778,7 +3780,6 @@ struct table_algo flow_hash = { * */ -static struct rtentry *lookup_kfib(void *key, int keylen, int fib); static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int kfib_parse_opts(int *pfib, char *data); @@ -3792,46 +3793,44 @@ static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, static int contigmask(uint8_t *p, int len); static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); +static int ta_dump_kfib_tentry_int(struct sockaddr *paddr, + struct sockaddr *pmask, ipfw_obj_tentry *tent); static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); -static struct rtentry * -lookup_kfib(void *key, int keylen, int fib) -{ - struct sockaddr *s; - - if (keylen == 4) { - struct sockaddr_in sin; - bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = *(in_addr_t *)key; - s = (struct sockaddr *)&sin; - } else { - struct sockaddr_in6 sin6; - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = *(struct in6_addr *)key; - s = (struct sockaddr *)&sin6; - } - - return (rtalloc1_fib(s, 0, 0, fib)); -} static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { - struct rtentry *rte; +#ifdef INET + struct nhop4_basic nh4; + struct in_addr in; +#endif +#ifdef INET6 + struct nhop6_basic nh6; +#endif + int error; - if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL) +#ifdef INET + if (keylen == 4) { + in.s_addr = *(in_addr_t *)key; + error = fib4_lookup_nh_basic(ti->data, + in, 0, 0, &nh4); + } +#endif +#ifdef INET6 + if (keylen == 6) + error = fib6_lookup_nh_basic(ti->data, + (struct in6_addr *)key, 0, 0, 0, &nh6); +#endif + + if (error != 0) return (0); *val = 0; - RTFREE_LOCKED(rte); return (1); } @@ -3940,6 +3939,16 @@ ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct rtentry *rte; + + rte = (struct rtentry *)e; + + return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent); +} + +static int +ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask, + ipfw_obj_tentry *tent) +{ #ifdef INET struct sockaddr_in *addr, *mask; #endif @@ -3948,14 +3957,13 @@ ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, #endif int len; - rte = (struct rtentry *)e; - addr = (struct sockaddr_in *)rt_key(rte); - mask = (struct sockaddr_in *)rt_mask(rte); len = 0; /* Guess IPv4/IPv6 radix by sockaddr family */ #ifdef INET - if (addr->sin_family == AF_INET) { + if (paddr->sa_family == AF_INET) { + addr = (struct sockaddr_in *)paddr; + mask = (struct sockaddr_in *)pmask; tent->k.addr.s_addr = addr->sin_addr.s_addr; len = 32; if (mask != NULL) @@ -3968,9 +3976,9 @@ ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, } #endif #ifdef INET6 - if (addr->sin_family == AF_INET6) { - addr6 = (struct sockaddr_in6 *)addr; - mask6 = (struct sockaddr_in6 *)mask; + if (paddr->sa_family == AF_INET6) { + addr6 = (struct sockaddr_in6 *)paddr; + mask6 = (struct sockaddr_in6 *)pmask; memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr)); len = 128; if (mask6 != NULL) @@ -3990,28 +3998,43 @@ static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { - struct rtentry *rte; - void *key; - int keylen; + struct rt_addrinfo info; + struct sockaddr_in6 key6, dst6, mask6; + struct sockaddr *dst, *key, *mask; + + /* Prepare sockaddr for prefix/mask and info */ + bzero(&dst6, sizeof(dst6)); + dst6.sin6_len = sizeof(dst6); + dst = (struct sockaddr *)&dst6; + bzero(&mask6, sizeof(mask6)); + mask6.sin6_len = sizeof(mask6); + mask = (struct sockaddr *)&mask6; + + bzero(&info, sizeof(info)); + info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_NETMASK] = mask; + + /* Prepare the lookup key */ + bzero(&key6, sizeof(key6)); + key6.sin6_family = tent->subtype; + key = (struct sockaddr *)&key6; if (tent->subtype == AF_INET) { - key = &tent->k.addr; - keylen = sizeof(struct in_addr); + ((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr; + key6.sin6_len = sizeof(struct sockaddr_in); } else { - key = &tent->k.addr6; - keylen = sizeof(struct in6_addr); + key6.sin6_addr = tent->k.addr6; + key6.sin6_len = sizeof(struct sockaddr_in6); } - if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL) - return (0); + if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0) + return (ENOENT); + if ((info.rti_addrs & RTA_NETMASK) == 0) + mask = NULL; - if (rte != NULL) { - ta_dump_kfib_tentry(ta_state, ti, rte, tent); - RTFREE_LOCKED(rte); - return (0); - } + ta_dump_kfib_tentry_int(dst, mask, tent); - return (ENOENT); + return (0); } static void From 6437b8e7d97fa919e28f905443f1136d3bf6f460 Mon Sep 17 00:00:00 2001 From: Dmitry Chagin Date: Sun, 10 Jan 2016 07:36:43 +0000 Subject: [PATCH 42/67] Unlock process lock when return error from getrobustlist call and add an forgotten dtrace probe when return the same error. MFC after: 3 days XMFC with: r292743 --- sys/compat/linux/linux_futex.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c index e2aad790ce2..db8ab2c71eb 100644 --- a/sys/compat/linux/linux_futex.c +++ b/sys/compat/linux/linux_futex.c @@ -1099,8 +1099,12 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args ESRCH); return (ESRCH); } - if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) + if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { + LIN_SDT_PROBE1(futex, linux_get_robust_list, return, + EPERM); + PROC_UNLOCK(td2->td_proc); return (EPERM); + } em = em_find(td2); KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); From 5d86098e84494ce36b3fa4cfc1fb81a711b7d35c Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 10 Jan 2016 07:50:35 +0000 Subject: [PATCH 43/67] Convert ipfilter to the new routing KPI. Differential Revision: D4764 --- sys/contrib/ipfilter/netinet/ip_fil_freebsd.c | 69 ++++++++----------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c index a5d3f44aea1..30ec46c11b9 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -50,6 +50,7 @@ static const char rcsid[] = "@(#)$Id$"; # include #include #include +#include #include #include #include @@ -712,17 +713,16 @@ ipf_fastroute(m0, mpp, fin, fdp) { register struct ip *ip, *mhip; register struct mbuf *m = *mpp; - register struct route *ro; int len, off, error = 0, hlen, code; struct ifnet *ifp, *sifp; - struct sockaddr_in *dst; - struct route iproute; + struct sockaddr_in dst; + struct nhop4_extended nh4; + int has_nhop = 0; + u_long fibnum = 0; u_short ip_off; frdest_t node; frentry_t *fr; - ro = NULL; - #ifdef M_WRITABLE /* * HOT FIX/KLUDGE: @@ -766,11 +766,10 @@ ipf_fastroute(m0, mpp, fin, fdp) /* * Route packet. */ - ro = &iproute; - bzero(ro, sizeof (*ro)); - dst = (struct sockaddr_in *)&ro->ro_dst; - dst->sin_family = AF_INET; - dst->sin_addr = ip->ip_dst; + bzero(&dst, sizeof (dst)); + dst.sin_family = AF_INET; + dst.sin_addr = ip->ip_dst; + dst.sin_len = sizeof(dst); fr = fin->fin_fr; if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) && @@ -790,25 +789,22 @@ ipf_fastroute(m0, mpp, fin, fdp) } if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0)) - dst->sin_addr = fdp->fd_ip; + dst.sin_addr = fdp->fd_ip; - dst->sin_len = sizeof(*dst); - in_rtalloc(ro, M_GETFIB(m0)); - - if ((ifp == NULL) && (ro->ro_rt != NULL)) - ifp = ro->ro_rt->rt_ifp; - - if ((ro->ro_rt == NULL) || (ifp == NULL)) { + fibnum = M_GETFIB(m0); + if (fib4_lookup_nh_ext(fibnum, dst.sin_addr, NHR_REF, 0, &nh4) != 0) { if (in_localaddr(ip->ip_dst)) error = EHOSTUNREACH; else error = ENETUNREACH; goto bad; } - if (ro->ro_rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; - if (ro->ro_rt) - counter_u64_add(ro->ro_rt->rt_pksent, 1); + + has_nhop = 1; + if (ifp == NULL) + ifp = nh4.nh_ifp; + if (nh4.nh_flags & NHF_GATEWAY) + dst.sin_addr = nh4.nh_addr; /* * For input packets which are being "fastrouted", they won't @@ -852,8 +848,8 @@ ipf_fastroute(m0, mpp, fin, fdp) if (ntohs(ip->ip_len) <= ifp->if_mtu) { if (!ip->ip_sum) ip->ip_sum = in_cksum(m, hlen); - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, - ro + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst, + NULL ); goto done; } @@ -935,8 +931,8 @@ sendorfree: m->m_act = 0; if (error == 0) error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, - ro + (struct sockaddr *)&dst, + NULL ); else FREE_MB_T(m); @@ -948,9 +944,9 @@ done: else ipfmain.ipf_frouteok[1]++; - if ((ro != NULL) && (ro->ro_rt != NULL)) { - RTFREE(ro->ro_rt); - } + if (has_nhop) + fib4_free_nh_ext(fibnum, &nh4); + return 0; bad: if (error == EMSGSIZE) { @@ -971,18 +967,11 @@ int ipf_verifysrc(fin) fr_info_t *fin; { - struct sockaddr_in *dst; - struct route iproute; + struct nhop4_basic nh4; - bzero((char *)&iproute, sizeof(iproute)); - dst = (struct sockaddr_in *)&iproute.ro_dst; - dst->sin_len = sizeof(*dst); - dst->sin_family = AF_INET; - dst->sin_addr = fin->fin_src; - in_rtalloc(&iproute, 0); - if (iproute.ro_rt == NULL) - return 0; - return (fin->fin_ifp == iproute.ro_rt->rt_ifp); + if (fib4_lookup_nh_basic(0, fin->fin_src, 0, 0, &nh4) != 0) + return (0); + return (fin->fin_ifp == nh4.nh_ifp); } From 60c274aaf8356315599a3d91339fc9540afa4247 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Sun, 10 Jan 2016 08:14:25 +0000 Subject: [PATCH 44/67] Initialize error after r293626 in case neither INET nor INET6 is compiled into the kernel. Ideally lots more code would just not be called (or compiled in) in that case but that requires a lot more surgery. For now try to make IP-less kernels compile again. --- sys/netpfil/ipfw/ip_fw_table_algo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index e6b7f6f53a1..75b2b670727 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -3826,6 +3826,9 @@ ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, error = fib6_lookup_nh_basic(ti->data, (struct in6_addr *)key, 0, 0, 0, &nh6); #endif +#if !defined(INET6) && !defined(INET) + error = ENOENT; +#endif if (error != 0) return (0); From 89fc126add0d6f5e253bd28c12a9bec6a06f75f0 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 10 Jan 2016 08:37:00 +0000 Subject: [PATCH 45/67] Initialize error value ta_lookup_kfib() by default to please compiler. --- sys/netpfil/ipfw/ip_fw_table_algo.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index 75b2b670727..2ce550ea841 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -3814,6 +3814,7 @@ ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, #endif int error; + error = ENOENT; #ifdef INET if (keylen == 4) { in.s_addr = *(in_addr_t *)key; @@ -3826,9 +3827,6 @@ ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, error = fib6_lookup_nh_basic(ti->data, (struct in6_addr *)key, 0, 0, 0, &nh6); #endif -#if !defined(INET6) && !defined(INET) - error = ENOENT; -#endif if (error != 0) return (0); From 290bc19b5b8f932f376d283a95b35c67c88ff2bc Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Sun, 10 Jan 2016 08:41:01 +0000 Subject: [PATCH 46/67] committer-src.dot: Add {adrian,delphij}->me Approved by: adrian (mentor) --- share/misc/committers-src.dot | 2 ++ 1 file changed, 2 insertions(+) diff --git a/share/misc/committers-src.dot b/share/misc/committers-src.dot index 24893fbd047..63012fded2b 100644 --- a/share/misc/committers-src.dot +++ b/share/misc/committers-src.dot @@ -344,6 +344,7 @@ adrian -> loos adrian -> monthadar adrian -> ray adrian -> rmh +adrian -> sephe ae -> melifaro @@ -404,6 +405,7 @@ das -> rodrigc delphij -> gabor delphij -> rafan +delphij -> sephe des -> anholt des -> hmp From ab861e6c065f7e2cba512b2206dba49208a0c031 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 10 Jan 2016 11:59:55 +0000 Subject: [PATCH 47/67] Do not hold ifaddr reference for the whole icmp6_reflect() exec time. Copy source address, calculate hlim and release refcount instead. --- sys/netinet6/icmp6.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 54d7e1eb651..2007a18547e 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -2114,13 +2114,13 @@ icmp6_rip6_input(struct mbuf **mp, int off) void icmp6_reflect(struct mbuf *m, size_t off) { - struct in6_addr src, *srcp = NULL; + struct in6_addr src6, *srcp; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia = NULL; struct ifnet *outif = NULL; int plen; - int type, code; + int type, code, hlim; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { @@ -2166,6 +2166,8 @@ icmp6_reflect(struct mbuf *m, size_t off) icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ + hlim = 0; + srcp = NULL; /* * If the incoming packet was addressed directly to us (i.e. unicast), @@ -2177,8 +2179,18 @@ icmp6_reflect(struct mbuf *m, size_t off) if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia != NULL && !(ia->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) - srcp = &ia->ia_addr.sin6_addr; + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { + src6 = ia->ia_addr.sin6_addr; + srcp = &src6; + + if (m->m_pkthdr.rcvif != NULL) { + /* XXX: This may not be the outgoing interface */ + hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; + } else + hlim = V_ip6_defhlim; + } + if (ia != NULL) + ifa_free(&ia->ia_ifa); } if (srcp == NULL) { @@ -2195,16 +2207,16 @@ icmp6_reflect(struct mbuf *m, size_t off) sin6.sin6_len = sizeof(sin6); sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ - e = in6_selectsrc(&sin6, NULL, NULL, NULL, &outif, &src); + e = in6_selectsrc(&sin6, NULL, NULL, NULL, &outif, &src6); if (e) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &sin6.sin6_addr), e)); + ip6_sprintf(ip6buf, &ip6->ip6_dst), e)); goto bad; } - srcp = &src; + srcp = &src6; } /* * ip6_input() drops a packet if its src is multicast. @@ -2218,11 +2230,8 @@ icmp6_reflect(struct mbuf *m, size_t off) ip6->ip6_nxt = IPPROTO_ICMPV6; if (outif) ip6->ip6_hlim = ND_IFINFO(outif)->chlim; - else if (m->m_pkthdr.rcvif) { - /* XXX: This may not be the outgoing interface */ - ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; - } else - ip6->ip6_hlim = V_ip6_defhlim; + else + ip6->ip6_hlim = hlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, @@ -2238,13 +2247,9 @@ icmp6_reflect(struct mbuf *m, size_t off) if (outif) icmp6_ifoutstat_inc(outif, type, code); - if (ia != NULL) - ifa_free(&ia->ia_ifa); return; bad: - if (ia != NULL) - ifa_free(&ia->ia_ifa); m_freem(m); return; } From 601c0b8bccc98b4c9e91be509655f21e160d3a3e Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 10 Jan 2016 13:40:29 +0000 Subject: [PATCH 48/67] Split in6_selectsrc() into in6_selectsrc_addr() and in6_selectsrc_socket(). in6_selectsrc() has 2 class of users: socket-based one (raw/udp/pcb/etc) and socket-less (ND code). The main reason for that change is inability to specify non-default FIB for callers w/o socket since (internally) inpcb is used to determine fib. As as result, add 2 wrappers for in6_selectsrc() (making in6_selectsrc() static): 1) in6_selectsrc_socket() for the former class. Embed scope_ambiguous check along with returning hop limit when needed. 2) in6_selectsrc_addr() for the latter case. Add 'fibnum' argument and pass IPv6 address w/ explicitly specified scope as separate argument. Reviewed by: ae (previous version) --- sys/netinet6/icmp6.c | 22 ++++------ sys/netinet6/in6_pcb.c | 12 ++---- sys/netinet6/in6_src.c | 84 ++++++++++++++++++++++++++++++++++++-- sys/netinet6/ip6_var.h | 7 ++-- sys/netinet6/nd6_nbr.c | 39 +++++++----------- sys/netinet6/raw_ip6.c | 33 ++++----------- sys/netinet6/udp6_usrreq.c | 10 +---- 7 files changed, 120 insertions(+), 87 deletions(-) diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 2007a18547e..dd77527cb2f 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -2194,26 +2194,25 @@ icmp6_reflect(struct mbuf *m, size_t off) } if (srcp == NULL) { - int e; - struct sockaddr_in6 sin6; + int error; + struct in6_addr dst6; + uint32_t scopeid; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ + in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, NULL, &src6, &hlim); - e = in6_selectsrc(&sin6, NULL, NULL, NULL, &outif, &src6); - if (e) { + if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &ip6->ip6_dst), e)); + ip6_sprintf(ip6buf, &ip6->ip6_dst), error)); goto bad; } srcp = &src6; @@ -2228,10 +2227,7 @@ icmp6_reflect(struct mbuf *m, size_t off) ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; - if (outif) - ip6->ip6_hlim = ND_IFINFO(outif)->chlim; - else - ip6->ip6_hlim = hlim; + ip6->ip6_hlim = hlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 67ecb8ca0b6..a779b8dca88 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -328,7 +328,6 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error = 0; - struct ifnet *ifp = NULL; int scope_ambiguous = 0; struct in6_addr in6a; @@ -358,20 +357,15 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) return (error); - error = in6_selectsrc(sin6, inp->in6p_outputopts, - inp, inp->inp_cred, &ifp, &in6a); + error = in6_selectsrc_socket(sin6, inp->in6p_outputopts, + inp, inp->inp_cred, scope_ambiguous, &in6a, NULL); if (error) return (error); - if (ifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { - return(error); - } - /* * Do not update this earlier, in case we return with an error. * - * XXX: this in6_selectsrc result might replace the bound local + * XXX: this in6_selectsrc_socket result might replace the bound local * address with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index fb362e25013..402940217d6 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -136,6 +136,9 @@ static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct ifnet **, struct ifnet *, u_int); +static int in6_selectsrc(uint32_t, struct sockaddr_in6 *, + struct ip6_pktopts *, struct inpcb *, struct ucred *, + struct ifnet **, struct in6_addr *); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); @@ -175,9 +178,9 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); goto out; /* XXX: we can't use 'break' here */ \ } while(0) -int -in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct inpcb *inp, struct ucred *cred, +static int +in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp) { struct rm_priotracker in6_ifa_tracker; @@ -228,7 +231,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, /* get the outgoing interface */ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, - (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) + fibnum)) != 0) return (error); @@ -544,6 +547,79 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, return (0); } +/* + * Select source address based on @inp, @dstsock and @opts. + * Stores selected address to @srcp. If @scope_ambiguous is set, + * embed scope from selected outgoing interface. If @hlim pointer + * is provided, stores calculated hop limit there. + * Returns 0 on success. + */ +int +in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct inpcb *inp, struct ucred *cred, int scope_ambiguous, + struct in6_addr *srcp, int *hlim) +{ + struct ifnet *retifp; + uint32_t fibnum; + int error; + + fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB; + retifp = NULL; + + error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp); + if (error != 0) + return (error); + + if (hlim != NULL) + *hlim = in6_selecthlim(inp, retifp); + + if (retifp == NULL || scope_ambiguous == 0) + return (0); + + /* + * Application should provide a proper zone ID or the use of + * default zone IDs should be enabled. Unfortunately, some + * applications do not behave as it should, so we need a + * workaround. Even if an appropriate ID is not determined + * (when it's required), if we can determine the outgoing + * interface. determine the zone ID based on the interface. + */ + error = in6_setscope(&dstsock->sin6_addr, retifp, NULL); + + return (error); +} + +/* + * Select source address based on @fibnum, @dst and @scopeid. + * Stores selected address to @srcp. + * Returns 0 on success. + * + * Used by non-socket based consumers (ND code mostly) + */ +int +in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst, + uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp, + int *hlim) +{ + struct ifnet *retifp; + struct sockaddr_in6 dst_sa; + int error; + + retifp = ifp; + bzero(&dst_sa, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = *dst; + dst_sa.sin6_scope_id = scopeid; + sa6_embedscope(&dst_sa, 0); + + error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp); + if (hlim != NULL) + *hlim = in6_selecthlim(NULL, retifp); + + return (error); +} + /* * clone - meaningful only for bsdi and freebsd */ diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index f43a28c199b..ef86eca9b03 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -418,9 +418,10 @@ int rip6_usrreq(struct socket *, int dest6_input(struct mbuf **, int *, int); int none_input(struct mbuf **, int *, int); -int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, - struct inpcb *inp, struct ucred *cred, - struct ifnet **, struct in6_addr *); +int in6_selectsrc_socket(struct sockaddr_in6 *, struct ip6_pktopts *, + struct inpcb *, struct ucred *, int, struct in6_addr *, int *); +int in6_selectsrc_addr(uint32_t, const struct in6_addr *, + uint32_t, struct ifnet *, struct in6_addr *, int *); int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index b7ba0b205f4..3a4e8a0ba02 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -481,27 +481,21 @@ nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6, ifa_free(ifa); } else { int error; - struct sockaddr_in6 dst_sa; - struct in6_addr src_in; - struct ifnet *oifp; + struct in6_addr dst6, src6; + uint32_t scopeid; - bzero(&dst_sa, sizeof(dst_sa)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(dst_sa); - dst_sa.sin6_addr = ip6->ip6_dst; - - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, - NULL, NULL, &oifp, &src_in); + in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "%s: source can't be " "determined: dst=%s, error=%d\n", __func__, - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), + ip6_sprintf(ip6buf, &dst6), error)); goto bad; } - ip6->ip6_src = src_in; + ip6->ip6_src = src6; } } else { /* @@ -941,12 +935,12 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, { struct mbuf *m; struct m_tag *mtag; - struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; - struct in6_addr src, daddr6; - struct sockaddr_in6 dst_sa; + struct in6_addr daddr6, dst6, src6; + uint32_t scopeid; + int icmp6len, maxlen, error; caddr_t mac = NULL; @@ -998,24 +992,21 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, flags &= ~ND_NA_FLAG_SOLICITED; } ip6->ip6_dst = daddr6; - bzero(&dst_sa, sizeof(struct sockaddr_in6)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(struct sockaddr_in6); - dst_sa.sin6_addr = daddr6; /* * Select a source whose scope is the same as that of the dest. */ - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, NULL, NULL, &oifp, &src); + in6_splitscope(&daddr6, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " "determined: dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); + ip6_sprintf(ip6buf, &daddr6), error)); goto bad; } - ip6->ip6_src = src; + ip6->ip6_src = src6; nd_na = (struct nd_neighbor_advert *)(ip6 + 1); nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; nd_na->nd_na_code = 0; diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 0725842cbe7..972eb9f1d9e 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -397,6 +397,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...) int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; + int hlim = 0; struct in6_addr in6a; va_list ap; @@ -460,8 +461,9 @@ rip6_output(struct mbuf *m, struct socket *so, ...) /* * Source address selection. */ - error = in6_selectsrc(dstsock, optp, in6p, so->so_cred, - &oifp, &in6a); + error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred, + scope_ambiguous, &in6a, &hlim); + if (error) goto bad; error = prison_check_ip6(in6p->inp_cred, &in6a); @@ -469,19 +471,6 @@ rip6_output(struct mbuf *m, struct socket *so, ...) goto bad; ip6->ip6_src = in6a; - if (oifp && scope_ambiguous) { - /* - * Application should provide a proper zone ID or the use of - * default zone IDs should be enabled. Unfortunately, some - * applications do not behave as it should, so we need a - * workaround. Even if an appropriate ID is not determined - * (when it's required), if we can determine the outgoing - * interface. determine the zone ID based on the interface. - */ - error = in6_setscope(&dstsock->sin6_addr, oifp, NULL); - if (error != 0) - goto bad; - } ip6->ip6_dst = dstsock->sin6_addr; /* @@ -496,7 +485,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...) * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->inp_ip_p; - ip6->ip6_hlim = in6_selecthlim(in6p, oifp); + ip6->ip6_hlim = hlim; if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { @@ -784,7 +773,6 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; - struct ifnet *ifp = NULL; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); @@ -813,21 +801,14 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ - error = in6_selectsrc(addr, inp->in6p_outputopts, - inp, so->so_cred, &ifp, &in6a); + error = in6_selectsrc_socket(addr, inp->in6p_outputopts, + inp, so->so_cred, scope_ambiguous, &in6a, NULL); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } - /* XXX: see above */ - if (ifp && scope_ambiguous && - (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) { - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_ripcbinfo); - return (error); - } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 353b260d33c..9768df58f6d 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -631,7 +631,6 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; - struct ifnet *oifp = NULL; int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; @@ -731,15 +730,10 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { - error = in6_selectsrc(sin6, optp, inp, - td->td_ucred, &oifp, &in6a); + error = in6_selectsrc_socket(sin6, optp, inp, + td->td_ucred, scope_ambiguous, &in6a, NULL); if (error) goto release; - if (oifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, - oifp, NULL))) { - goto release; - } laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ From cf45f1240d75a92edf739e6e60bdd0ea6bba7a32 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 10 Jan 2016 16:31:28 +0000 Subject: [PATCH 49/67] sh: Update associated state when restoring locals while leaving a function. Some variables like PATH call a function when modified. Make sure to call this also when leaving a function where such a variable was made local. Make sure to restore local variables before shellparam, so getopts state is not clobbered. --- bin/sh/eval.c | 4 ++-- bin/sh/tests/builtins/Makefile | 1 + bin/sh/tests/builtins/local5.0 | 15 +++++++++++++++ bin/sh/var.c | 11 +++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 bin/sh/tests/builtins/local5.0 diff --git a/bin/sh/eval.c b/bin/sh/eval.c index 5a3f8e71883..949157d1e6b 100644 --- a/bin/sh/eval.c +++ b/bin/sh/eval.c @@ -1039,12 +1039,12 @@ evalcommand(union node *cmd, int flags, struct backcmd *backcmd) reffunc(cmdentry.u.func); savehandler = handler; if (setjmp(jmploc.loc)) { - freeparam(&shellparam); - shellparam = saveparam; popredir(); unreffunc(cmdentry.u.func); poplocalvars(); localvars = savelocalvars; + freeparam(&shellparam); + shellparam = saveparam; funcnest--; handler = savehandler; longjmp(handler->loc, 1); diff --git a/bin/sh/tests/builtins/Makefile b/bin/sh/tests/builtins/Makefile index 11240ca7dda..1511f70eb86 100644 --- a/bin/sh/tests/builtins/Makefile +++ b/bin/sh/tests/builtins/Makefile @@ -111,6 +111,7 @@ FILES+= local1.0 FILES+= local2.0 FILES+= local3.0 FILES+= local4.0 +FILES+= local5.0 .if ${MK_NLS} != "no" FILES+= locale1.0 .endif diff --git a/bin/sh/tests/builtins/local5.0 b/bin/sh/tests/builtins/local5.0 new file mode 100644 index 00000000000..2f2a14e110a --- /dev/null +++ b/bin/sh/tests/builtins/local5.0 @@ -0,0 +1,15 @@ +# $FreeBSD$ + +f() { + local PATH IFS elem + IFS=: + for elem in ''$PATH''; do + PATH=/var/empty/$elem:$PATH + done + ls -d / >/dev/null +} + +p1=$(command -v ls) +f +p2=$(command -v ls) +[ "$p1" = "$p2" ] diff --git a/bin/sh/var.c b/bin/sh/var.c index d4013618ccd..3af7dbeca19 100644 --- a/bin/sh/var.c +++ b/bin/sh/var.c @@ -791,6 +791,7 @@ poplocalvars(void) { struct localvar *lvp; struct var *vp; + int islocalevar; INTOFF; while ((lvp = localvars) != NULL) { @@ -803,10 +804,20 @@ poplocalvars(void) } else if ((lvp->flags & (VUNSET|VSTRFIXED)) == VUNSET) { (void)unsetvar(vp->text); } else { + islocalevar = (vp->flags | lvp->flags) & VEXPORT && + localevar(lvp->text); if ((vp->flags & VTEXTFIXED) == 0) ckfree(vp->text); vp->flags = lvp->flags; vp->text = lvp->text; + if (vp->func) + (*vp->func)(vp->text + vp->name_len + 1); + if (islocalevar) { + change_env(vp->text, vp->flags & VEXPORT && + (vp->flags & VUNSET) == 0); + setlocale(LC_ALL, ""); + updatecharset(); + } } ckfree(lvp); } From a18c313e4a629099f2a747656e6307b6065616d5 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Sun, 10 Jan 2016 16:42:14 +0000 Subject: [PATCH 50/67] Use setjmp() instead of the identical-except-for-having-a-wrong-prototype setfault() when testing for faults. This should also help the compiler do the right thing with this complicated-to-optimize function. --- sys/powerpc/include/pcb.h | 4 +-- sys/powerpc/include/setjmp.h | 4 +++ sys/powerpc/ofw/rtas.c | 7 ++-- sys/powerpc/powermac/grackle.c | 7 ++-- sys/powerpc/powerpc/copyinout.c | 62 ++++++++++++++++++-------------- sys/powerpc/powerpc/mp_machdep.c | 1 - sys/powerpc/powerpc/trap.c | 22 ++++++------ 7 files changed, 60 insertions(+), 47 deletions(-) diff --git a/sys/powerpc/include/pcb.h b/sys/powerpc/include/pcb.h index 6caf5d2d661..094949ed948 100644 --- a/sys/powerpc/include/pcb.h +++ b/sys/powerpc/include/pcb.h @@ -35,7 +35,7 @@ #ifndef _MACHINE_PCB_H_ #define _MACHINE_PCB_H_ -typedef register_t faultbuf[25]; +#include struct pcb { register_t pcb_context[20]; /* non-volatile r14-r31 */ @@ -44,7 +44,7 @@ struct pcb { register_t pcb_toc; /* toc pointer */ register_t pcb_lr; /* link register */ struct pmap *pcb_pm; /* pmap of our vmspace */ - faultbuf *pcb_onfault; /* For use during + jmp_buf *pcb_onfault; /* For use during copyin/copyout */ int pcb_flags; #define PCB_FPU 1 /* Process uses FPU */ diff --git a/sys/powerpc/include/setjmp.h b/sys/powerpc/include/setjmp.h index e453044dbf3..b95a1115be9 100644 --- a/sys/powerpc/include/setjmp.h +++ b/sys/powerpc/include/setjmp.h @@ -8,7 +8,11 @@ #include +#ifdef _KERNEL +#define _JBLEN 25 /* Kernel doesn't save FP and Altivec regs */ +#else #define _JBLEN 100 +#endif /* * jmp_buf and sigjmp_buf are encapsulated in different structs to force diff --git a/sys/powerpc/ofw/rtas.c b/sys/powerpc/ofw/rtas.c index 15cb58e8353..5dff8efcdac 100644 --- a/sys/powerpc/ofw/rtas.c +++ b/sys/powerpc/ofw/rtas.c @@ -62,8 +62,6 @@ int rtascall(vm_offset_t callbuffer, uintptr_t rtas_privdat); extern uintptr_t rtas_entry; extern register_t rtasmsr; -int setfault(faultbuf); /* defined in locore.S */ - /* * After the VM is up, allocate RTAS memory and instantiate it */ @@ -203,7 +201,7 @@ int rtas_call_method(cell_t token, int nargs, int nreturns, ...) { vm_offset_t argsptr; - faultbuf env, *oldfaultbuf; + jmp_buf env, *oldfaultbuf; va_list ap; struct { cell_t token; @@ -233,7 +231,8 @@ rtas_call_method(cell_t token, int nargs, int nreturns, ...) /* Get rid of any stale machine checks that have been waiting. */ __asm __volatile ("sync; isync"); oldfaultbuf = curthread->td_pcb->pcb_onfault; - if (!setfault(env)) { + curthread->td_pcb->pcb_onfault = &env; + if (!setjmp(env)) { __asm __volatile ("sync"); result = rtascall(argsptr, rtas_private_data); __asm __volatile ("sync; isync"); diff --git a/sys/powerpc/powermac/grackle.c b/sys/powerpc/powermac/grackle.c index b4a8a3b5e03..95d59a1ca74 100644 --- a/sys/powerpc/powermac/grackle.c +++ b/sys/powerpc/powermac/grackle.c @@ -82,8 +82,6 @@ static int grackle_enable_config(struct grackle_softc *, u_int, static void grackle_disable_config(struct grackle_softc *); static int badaddr(void *, size_t); -int setfault(faultbuf); /* defined in locore.S */ - /* * Driver methods. */ @@ -244,7 +242,7 @@ static int badaddr(void *addr, size_t size) { struct thread *td; - faultbuf env, *oldfaultbuf; + jmp_buf env, *oldfaultbuf; int x; /* Get rid of any stale machine checks that have been waiting. */ @@ -253,7 +251,8 @@ badaddr(void *addr, size_t size) td = curthread; oldfaultbuf = td->td_pcb->pcb_onfault; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = oldfaultbuf; __asm __volatile ("sync"); return 1; diff --git a/sys/powerpc/powerpc/copyinout.c b/sys/powerpc/powerpc/copyinout.c index fdee51da056..29c4561ef41 100644 --- a/sys/powerpc/powerpc/copyinout.c +++ b/sys/powerpc/powerpc/copyinout.c @@ -71,8 +71,6 @@ __FBSDID("$FreeBSD$"); #include #include -int setfault(faultbuf); /* defined in locore.S */ - #ifdef AIM /* * Makes sure that the right segment of userspace is mapped in. @@ -176,7 +174,7 @@ copyout(const void *kaddr, void *udaddr, size_t len) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; const char *kp; char *up, *p; size_t l; @@ -184,7 +182,8 @@ copyout(const void *kaddr, void *udaddr, size_t len) td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } @@ -214,7 +213,7 @@ copyin(const void *udaddr, void *kaddr, size_t len) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; const char *up; char *kp, *p; size_t l; @@ -222,7 +221,8 @@ copyin(const void *udaddr, void *kaddr, size_t len) td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } @@ -285,13 +285,14 @@ subyte(volatile void *addr, int byte) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; char *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -313,13 +314,14 @@ suword32(volatile void *addr, int word) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; int *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -341,13 +343,14 @@ suword(volatile void *addr, long word) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; long *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -382,14 +385,15 @@ fubyte(volatile const void *addr) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; u_char *p; int val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -410,13 +414,14 @@ fuword16(volatile const void *addr) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; uint16_t *p, val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -437,13 +442,14 @@ fueword32(volatile const void *addr, int32_t *val) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; int32_t *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -465,13 +471,14 @@ fueword64(volatile const void *addr, int64_t *val) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; int64_t *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -493,13 +500,14 @@ fueword(volatile const void *addr, long *val) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; long *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -521,13 +529,14 @@ casueword32(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp, { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; uint32_t *p, val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } @@ -572,13 +581,14 @@ casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new) { struct thread *td; pmap_t pm; - faultbuf env; + jmp_buf env; u_long *p, val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; - if (setfault(env)) { + td->td_pcb->pcb_onfault = &env; + if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c index 9d26e958103..2f20b71a3b7 100644 --- a/sys/powerpc/powerpc/mp_machdep.c +++ b/sys/powerpc/powerpc/mp_machdep.c @@ -67,7 +67,6 @@ volatile static u_quad_t ap_timebase; static u_int ipi_msg_cnt[32]; static struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; -int longfault(faultbuf, int); void machdep_ap_bootstrap(void) diff --git a/sys/powerpc/powerpc/trap.c b/sys/powerpc/powerpc/trap.c index 4c9735a5255..d4dac2ee278 100644 --- a/sys/powerpc/powerpc/trap.c +++ b/sys/powerpc/powerpc/trap.c @@ -74,11 +74,12 @@ __FBSDID("$FreeBSD$"); #include #include -#define FAULTBUF_LR 0 +/* Below matches setjmp.S */ +#define FAULTBUF_LR 21 #define FAULTBUF_R1 1 #define FAULTBUF_R2 2 -#define FAULTBUF_CR 3 -#define FAULTBUF_R13 4 +#define FAULTBUF_CR 22 +#define FAULTBUF_R14 3 static void trap_fatal(struct trapframe *frame); static void printtrap(u_int vector, struct trapframe *frame, int isfatal, @@ -462,18 +463,19 @@ static int handle_onfault(struct trapframe *frame) { struct thread *td; - faultbuf *fb; + jmp_buf *fb; td = curthread; fb = td->td_pcb->pcb_onfault; if (fb != NULL) { - frame->srr0 = (*fb)[FAULTBUF_LR]; - frame->fixreg[1] = (*fb)[FAULTBUF_R1]; - frame->fixreg[2] = (*fb)[FAULTBUF_R2]; + frame->srr0 = (*fb)->_jb[FAULTBUF_LR]; + frame->fixreg[1] = (*fb)->_jb[FAULTBUF_R1]; + frame->fixreg[2] = (*fb)->_jb[FAULTBUF_R2]; frame->fixreg[3] = 1; - frame->cr = (*fb)[FAULTBUF_CR]; - bcopy(&(*fb)[FAULTBUF_R13], &frame->fixreg[13], - 19 * sizeof(register_t)); + frame->cr = (*fb)->_jb[FAULTBUF_CR]; + bcopy(&(*fb)->_jb[FAULTBUF_R14], &frame->fixreg[14], + 18 * sizeof(register_t)); + td->td_pcb->pcb_onfault = NULL; /* Returns twice, not thrice */ return (1); } return (0); From 54de56f3b2dc3ea39ab3c86c601c4728a8a3a8cd Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sun, 10 Jan 2016 17:53:43 +0000 Subject: [PATCH 51/67] Fix the domain iterator to not try the first-touch / fixed domain more than once when doing round-robin. This lead to a panic because the iterator was trying the same domain twice and not trying one of the other domains. Reported by: pho Tested by: pho --- sys/vm/vm_domain.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/sys/vm/vm_domain.c b/sys/vm/vm_domain.c index 83814d516d1..c042aa7694d 100644 --- a/sys/vm/vm_domain.c +++ b/sys/vm/vm_domain.c @@ -62,7 +62,7 @@ __FBSDID("$FreeBSD$"); #include static __inline int -vm_domain_rr_selectdomain(void) +vm_domain_rr_selectdomain(int skip_domain) { #if MAXMEMDOM > 1 struct thread *td; @@ -71,6 +71,16 @@ vm_domain_rr_selectdomain(void) td->td_dom_rr_idx++; td->td_dom_rr_idx %= vm_ndomains; + + /* + * If skip_domain is provided then skip over that + * domain. This is intended for round robin variants + * which first try a fixed domain. + */ + if ((skip_domain > -1) && (td->td_dom_rr_idx == skip_domain)) { + td->td_dom_rr_idx++; + td->td_dom_rr_idx %= vm_ndomains; + } return (td->td_dom_rr_idx); #else return (0); @@ -339,12 +349,12 @@ vm_domain_iterator_run(struct vm_domain_iterator *vi, int *domain) if (vi->n == vm_ndomains) *domain = vi->domain; else - *domain = vm_domain_rr_selectdomain(); + *domain = vm_domain_rr_selectdomain(vi->domain); vi->n--; break; case VM_POLICY_ROUND_ROBIN: default: - *domain = vm_domain_rr_selectdomain(); + *domain = vm_domain_rr_selectdomain(-1); vi->n--; break; } From ca496abd5a671f3bc8dc55ad8cb511c309f523f7 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Sun, 10 Jan 2016 18:00:01 +0000 Subject: [PATCH 52/67] Remove dead code and dead comments, most notably the implemenation of the now-obsolete setfault(). No NetBSD code exists in the AIM locore files, so update the copyrights there. --- sys/powerpc/aim/locore32.S | 71 ++-------------------- sys/powerpc/aim/locore64.S | 99 ++----------------------------- sys/powerpc/aim/trap_subr64.S | 9 ++- sys/powerpc/booke/booke_machdep.c | 3 +- sys/powerpc/booke/locore.S | 23 ------- sys/powerpc/powerpc/setjmp.S | 1 - 6 files changed, 18 insertions(+), 188 deletions(-) diff --git a/sys/powerpc/aim/locore32.S b/sys/powerpc/aim/locore32.S index 477ae88114e..b14215cb58d 100644 --- a/sys/powerpc/aim/locore32.S +++ b/sys/powerpc/aim/locore32.S @@ -1,8 +1,7 @@ /* $FreeBSD$ */ -/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- - * Copyright (C) 2001 Benno Rice + * Copyright (C) 2010-2016 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,37 +13,7 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/*- - * Copyright (C) 1995, 1996 Wolfgang Solfrank. - * Copyright (C) 1995, 1996 TooLs GmbH. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by TooLs GmbH. - * 4. The name of TooLs GmbH may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, @@ -95,15 +64,7 @@ GLOBAL(tmpstk) btext: /* - * This symbol is here for the benefit of kvm_mkdb, and is supposed to - * mark the start of kernel text. - */ - .globl kernel_text -kernel_text: - -/* - * Startup entry. Note, this must be the first thing in the text - * segment! + * Main kernel entry point. */ .text .globl __start @@ -148,29 +109,7 @@ __start: stw %r3, 0(%r1) bl mi_startup - /* If mi_startup somehow returns, exit. This would be bad. */ - b OF_exit - -/* - * int setfault() - * - * Similar to setjmp to setup for handling faults on accesses to user memory. - * Any routine using this may only call bcopy, either the form below, - * or the (currently used) C code optimized, so it doesn't use any non-volatile - * registers. - */ - .globl setfault -setfault: - mflr 0 - mfcr 12 - mfsprg 4,0 - lwz 4,TD_PCB(2) /* curthread = r2 */ - stw 3,PCB_ONFAULT(4) - stw 0,0(3) - stw 1,4(3) - stw 2,8(3) - stmw 12,12(3) - xor 3,3,3 - blr + /* mi_startup() does not return */ + b . #include diff --git a/sys/powerpc/aim/locore64.S b/sys/powerpc/aim/locore64.S index 7f9379341db..cd64b5f81cf 100644 --- a/sys/powerpc/aim/locore64.S +++ b/sys/powerpc/aim/locore64.S @@ -1,8 +1,7 @@ /* $FreeBSD$ */ -/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- - * Copyright (C) 2001 Benno Rice + * Copyright (C) 2010-2016 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -14,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, @@ -24,36 +23,8 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/*- - * Copyright (C) 1995, 1996 Wolfgang Solfrank. - * Copyright (C) 1995, 1996 TooLs GmbH. - * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by TooLs GmbH. - * 4. The name of TooLs GmbH may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * $FreeBSD$ */ #include "assym.s" @@ -69,13 +40,6 @@ .abiversion _CALL_ELF #endif -/* Locate the per-CPU data structure */ -#define GET_CPUINFO(r) \ - mfsprg0 r -#define GET_TOCBASE(r) \ - li r,TRAP_TOCBASE; /* Magic address for TOC */ \ - ld r,0(r) - /* Glue for linker script */ .globl kernbase .set kernbase, KERNBASE @@ -102,15 +66,7 @@ TOC_ENTRY(tmpstk) btext: /* - * This symbol is here for the benefit of kvm_mkdb, and is supposed to - * mark the start of kernel text. - */ - .globl kernel_text -kernel_text: - -/* - * Startup entry. Note, this must be the first thing in the text - * segment! + * Main kernel entry point. * * Calling convention: * r3: Flattened Device Tree pointer (or zero) @@ -169,50 +125,7 @@ ASENTRY_NOPROF(__start) bl mi_startup nop - /* If this returns (it won't), go back to firmware */ - b OF_exit - nop - -/* - * int setfault() - * - * Similar to setjmp to setup for handling faults on accesses to user memory. - * Any routine using this may only call bcopy, either the form below, - * or the (currently used) C code optimized, so it doesn't use any non-volatile - * registers. - */ -ASENTRY_NOPROF(setfault) - mflr 0 - mfcr 12 - mfsprg 4,0 - ld 4,TD_PCB(13) /* curthread = r13 */ - std 3,PCB_ONFAULT(4) - std 0,0(3) - std 1,8(3) - std 2,16(3) - - std %r12,24(%r3) /* Save the non-volatile GP regs. */ - std %r13,24+1*8(%r3) - std %r14,24+2*8(%r3) - std %r15,24+3*8(%r3) - std %r16,24+4*8(%r3) - std %r17,24+5*8(%r3) - std %r18,24+6*8(%r3) - std %r19,24+7*8(%r3) - std %r20,24+8*8(%r3) - std %r21,24+9*8(%r3) - std %r22,24+10*8(%r3) - std %r23,24+11*8(%r3) - std %r24,24+12*8(%r3) - std %r25,24+13*8(%r3) - std %r26,24+14*8(%r3) - std %r27,24+15*8(%r3) - std %r28,24+16*8(%r3) - std %r29,24+17*8(%r3) - std %r30,24+18*8(%r3) - std %r31,24+19*8(%r3) - - xor 3,3,3 - blr + /* Unreachable */ + b . #include diff --git a/sys/powerpc/aim/trap_subr64.S b/sys/powerpc/aim/trap_subr64.S index 7ad0a7039ea..587086e0eea 100644 --- a/sys/powerpc/aim/trap_subr64.S +++ b/sys/powerpc/aim/trap_subr64.S @@ -39,9 +39,12 @@ * #include */ -/* - * Save/restore segment registers - */ +/* Locate the per-CPU data structure */ +#define GET_CPUINFO(r) \ + mfsprg0 r +#define GET_TOCBASE(r) \ + li r,TRAP_TOCBASE; /* Magic address for TOC */ \ + ld r,0(r) /* * Restore SRs for a pmap diff --git a/sys/powerpc/booke/booke_machdep.c b/sys/powerpc/booke/booke_machdep.c index a019597e842..78ca12fb111 100644 --- a/sys/powerpc/booke/booke_machdep.c +++ b/sys/powerpc/booke/booke_machdep.c @@ -156,7 +156,6 @@ __FBSDID("$FreeBSD$"); #define debugf(fmt, args...) #endif -extern unsigned char kernel_text[]; extern unsigned char _etext[]; extern unsigned char _edata[]; extern unsigned char __bss_start[]; @@ -311,7 +310,7 @@ booke_init(u_long arg1, u_long arg2) end += fdt_totalsize((void *)dtbp); __endkernel = end; mdp = NULL; - } else if (arg1 > (uintptr_t)kernel_text) /* FreeBSD loader */ + } else if (arg1 > (uintptr_t)btext) /* FreeBSD loader */ mdp = (void *)arg1; else /* U-Boot */ mdp = NULL; diff --git a/sys/powerpc/booke/locore.S b/sys/powerpc/booke/locore.S index 2d6c66d8bc6..a49d79675b4 100644 --- a/sys/powerpc/booke/locore.S +++ b/sys/powerpc/booke/locore.S @@ -837,29 +837,6 @@ ENTRY(dataloss_erratum_access) blr -/* - * int setfault() - * - * Similar to setjmp to setup for handling faults on accesses to user memory. - * Any routine using this may only call bcopy, either the form below, - * or the (currently used) C code optimized, so it doesn't use any non-volatile - * registers. - */ - .globl setfault -setfault: - mflr %r0 - mfsprg0 %r4 - lwz %r4, TD_PCB(%r2) - stw %r3, PCB_ONFAULT(%r4) - mfcr %r4 - stw %r0, 0(%r3) - stw %r1, 4(%r3) - stw %r2, 8(%r3) - stw %r4, 12(%r3) - stmw %r13, 16(%r3) /* store CR, CTR, XER, [r13 .. r31] */ - li %r3, 0 /* return FALSE */ - blr - /************************************************************************/ /* Data section */ /************************************************************************/ diff --git a/sys/powerpc/powerpc/setjmp.S b/sys/powerpc/powerpc/setjmp.S index 3884b114274..910f58bf667 100644 --- a/sys/powerpc/powerpc/setjmp.S +++ b/sys/powerpc/powerpc/setjmp.S @@ -40,7 +40,6 @@ #define JMP_cr 22*REGWIDTH #define JMP_ctr 23*REGWIDTH #define JMP_xer 24*REGWIDTH -#define JMP_sig 25*REGWIDTH ASENTRY_NOPROF(setjmp) ST_REG 31, JMP_r31(3) From 3deebd539b0a4a8cc136dc9adedf22ab84a9bc19 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Sun, 10 Jan 2016 18:11:23 +0000 Subject: [PATCH 53/67] - Add support for Advantech PCI-1602 Rev. B1 and PCI-1603 cards. [1] - Add a description of Advantech PCI-1602 Rev. A boards. [1] - Properly set up REG_ACR also for PCI-1602 Rev. A based on what the Advantech-supplied Linux driver does. - Additionally use the macros of to replace existing magic values and get rid of trivial comments. - Fix the style of some comments. PR: 205359 [1] Submitted by: Jan Mikkelsen (original patch) [1] --- sys/dev/ic/ns16550.h | 2 + sys/dev/puc/pucdata.c | 154 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 128 insertions(+), 28 deletions(-) diff --git a/sys/dev/ic/ns16550.h b/sys/dev/ic/ns16550.h index 33a7dd14b82..e4c714f2146 100644 --- a/sys/dev/ic/ns16550.h +++ b/sys/dev/ic/ns16550.h @@ -205,6 +205,7 @@ * requires ACR[6]. */ #define com_icr 5 /* index control register (R/W) */ +#define REG_ICR com_icr /* * 16950 register #7. It is the same as com_scr except it has a different @@ -220,6 +221,7 @@ */ #define com_acr 0 /* additional control register (R/W) */ +#define REG_ACR com_acr #define ACR_ASE 0x80 /* ASR/RFL/TFL enable */ #define ACR_ICRE 0x40 /* ICR enable */ #define ACR_TLE 0x20 /* TTL/RTL enable */ diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c index b953146b0c4..d8ca2584ab6 100644 --- a/sys/dev/puc/pucdata.c +++ b/sys/dev/puc/pucdata.c @@ -42,12 +42,16 @@ __FBSDID("$FreeBSD$"); #include #include +#include + +#include #include #include #include #include +static puc_config_f puc_config_advantech; static puc_config_f puc_config_amc; static puc_config_f puc_config_diva; static puc_config_f puc_config_exar; @@ -691,10 +695,25 @@ const struct puc_cfg puc_pci_devices[] = { .config_function = puc_config_exar_pcie }, + /* + * The Advantech PCI-1602 Rev. A use the first two ports of an Oxford + * Semiconductor OXuPCI954. Note these boards have a hardware bug in + * that they drive the RS-422/485 transmitters after power-on until a + * driver initalizes the UARTs. + */ { 0x13fe, 0x1600, 0x1602, 0x0002, - "Advantech PCI-1602", + "Advantech PCI-1602 Rev. A", DEFAULT_RCLK * 8, PUC_PORT_2S, 0x10, 0, 8, + .config_function = puc_config_advantech + }, + + /* Advantech PCI-1602 Rev. B1/PCI-1603 are also based on OXuPCI952. */ + { 0x13fe, 0xa102, 0x13fe, 0xa102, + "Advantech 2-port PCI (PCI-1602 Rev. B1/PCI-1603)", + DEFAULT_RCLK * 8, + PUC_PORT_2S, 0x10, 4, 0, + .config_function = puc_config_advantech }, { 0x1407, 0x0100, 0xffff, 0, @@ -1255,6 +1274,92 @@ const struct puc_cfg puc_pci_devices[] = { { 0xffff, 0, 0xffff, 0, NULL, 0 } }; +static int +puc_config_advantech(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, + intptr_t *res __unused) +{ + const struct puc_cfg *cfg; + struct resource *cres; + struct puc_bar *bar; + device_t cdev, dev; + bus_size_t off; + int base, crtype, fixed, high, i, oxpcie; + uint8_t acr, func, mask; + + if (cmd != PUC_CFG_SETUP) + return (ENXIO); + + base = fixed = oxpcie = 0; + crtype = SYS_RES_IOPORT; + acr = mask = 0x0; + func = high = 1; + off = 0x60; + + cfg = sc->sc_cfg; + switch (cfg->subvendor) { + case 0x13fe: + switch (cfg->device) { + case 0xa102: + high = 0; + break; + default: + break; + } + default: + break; + } + if (fixed == 1) + goto setup; + + dev = sc->sc_dev; + cdev = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev), + pci_get_slot(dev), func); + if (cdev == NULL) { + device_printf(dev, "could not find config function\n"); + return (ENXIO); + } + + i = PCIR_BAR(0); + cres = bus_alloc_resource_any(cdev, crtype, &i, RF_ACTIVE); + if (cres == NULL) { + device_printf(dev, "could not allocate config resource\n"); + return (ENXIO); + } + + if (oxpcie == 0) { + mask = bus_read_1(cres, off); + if (pci_get_function(dev) == 1) + base = 4; + } + + setup: + for (i = 0; i < sc->sc_nports; ++i) { + device_printf(dev, "port %d: ", i); + bar = puc_get_bar(sc, cfg->rid + i * cfg->d_rid); + if (bar == NULL) { + printf("could not get BAR\n"); + continue; + } + + if (fixed == 0) { + if ((mask & (1 << (base + i))) == 0) { + acr = 0; + printf("RS-232\n"); + } else { + acr = (high == 1 ? 0x18 : 0x10); + printf("RS-422/RS-485, active-%s auto-DTR\n", + high == 1 ? "high" : "low"); + } + } + + bus_write_1(bar->b_res, REG_SPR, REG_ACR); + bus_write_1(bar->b_res, REG_ICR, acr); + } + + bus_release_resource(cdev, crtype, rman_get_rid(cres), cres); + return (0); +} + static int puc_config_amc(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd, int port, intptr_t *res) @@ -1360,24 +1465,17 @@ puc_config_quatech(struct puc_softc *sc, enum puc_cfg_cmd cmd, bar = puc_get_bar(sc, cfg->rid); if (bar == NULL) return (ENXIO); - /* Set DLAB in the LCR register of UART 0. */ - bus_write_1(bar->b_res, 3, 0x80); - /* Write 0 to the SPR register of UART 0. */ - bus_write_1(bar->b_res, 7, 0); - /* Read back the contents of the SPR register of UART 0. */ - v0 = bus_read_1(bar->b_res, 7); - /* Write a specific value to the SPR register of UART 0. */ - bus_write_1(bar->b_res, 7, 0x80 + -cfg->clock); - /* Read back the contents of the SPR register of UART 0. */ - v1 = bus_read_1(bar->b_res, 7); - /* Clear DLAB in the LCR register of UART 0. */ - bus_write_1(bar->b_res, 3, 0); - /* Save the two values read-back from the SPR register. */ + bus_write_1(bar->b_res, REG_LCR, LCR_DLAB); + bus_write_1(bar->b_res, REG_SPR, 0); + v0 = bus_read_1(bar->b_res, REG_SPR); + bus_write_1(bar->b_res, REG_SPR, 0x80 + -cfg->clock); + v1 = bus_read_1(bar->b_res, REG_SPR); + bus_write_1(bar->b_res, REG_LCR, 0); sc->sc_cfg_data = (v0 << 8) | v1; if (v0 == 0 && v1 == 0x80 + -cfg->clock) { /* * The SPR register echoed the two values written - * by us. This means that the SPAD jumper is set. + * by us. This means that the SPAD jumper is set. */ device_printf(sc->sc_dev, "warning: extra features " "not usable -- SPAD compatibility enabled\n"); @@ -1385,7 +1483,7 @@ puc_config_quatech(struct puc_softc *sc, enum puc_cfg_cmd cmd, } if (v0 != 0) { /* - * The first value doesn't match. This can only mean + * The first value doesn't match. This can only mean * that the SPAD jumper is not set and that a non- * standard fixed clock multiplier jumper is set. */ @@ -1399,8 +1497,8 @@ puc_config_quatech(struct puc_softc *sc, enum puc_cfg_cmd cmd, return (0); } /* - * The first value matched, but the second didn't. We know - * that the SPAD jumper is not set. We also know that the + * The first value matched, but the second didn't. We know + * that the SPAD jumper is not set. We also know that the * clock rate multiplier is software controlled *and* that * we just programmed it to the maximum allowed. */ @@ -1415,8 +1513,8 @@ puc_config_quatech(struct puc_softc *sc, enum puc_cfg_cmd cmd, /* * XXX With the SPAD jumper applied, there's no * easy way of knowing if there's also a clock - * rate multiplier jumper installed. Let's hope - * not... + * rate multiplier jumper installed. Let's hope + * not ... */ *res = DEFAULT_RCLK; } else if (v0 == 0) { @@ -1678,15 +1776,15 @@ puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, case PUC_CFG_GET_NPORTS: /* * Check if we are being called from puc_bfe_attach() - * or puc_bfe_probe(). If puc_bfe_probe(), we cannot - * puc_get_bar(), so we return a value of 16. This has cosmetic - * side-effects at worst; in PUC_CFG_GET_DESC, - * (int)sc->sc_cfg_data will not contain the true number of - * ports in PUC_CFG_GET_DESC, but we are not implementing that - * call for this device family anyway. + * or puc_bfe_probe(). If puc_bfe_probe(), we cannot + * puc_get_bar(), so we return a value of 16. This has + * cosmetic side-effects at worst; in PUC_CFG_GET_DESC, + * sc->sc_cfg_data will not contain the true number of + * ports in PUC_CFG_GET_DESC, but we are not implementing + * that call for this device family anyway. * - * The check is for initialisation of sc->sc_bar[idx], which is - * only done in puc_bfe_attach(). + * The check is for initialization of sc->sc_bar[idx], + * which is only done in puc_bfe_attach(). */ idx = 0; do { From 5ffa1d26c000f28662e2485a7297131093c9460d Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Sun, 10 Jan 2016 20:10:34 +0000 Subject: [PATCH 54/67] Fix bhyve(1) operation on vmnet devices, broken in r293459. --- usr.sbin/bhyve/pci_virtio_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index a8c56210e92..6f264a73788 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -850,7 +850,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) if (strncmp(devname, "vale", 4) == 0) pci_vtnet_netmap_setup(sc, devname); - if (strncmp(devname, "tap", 3) == 0) + if (strncmp(devname, "tap", 3) == 0 || + strncmp(devname, "vmnet", 5) == 0) pci_vtnet_tap_setup(sc, devname); free(devname); From 84eacaf728a102612d83861d73c3aaa353ca3dc2 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Sun, 10 Jan 2016 23:41:31 +0000 Subject: [PATCH 55/67] Use -mlong-calls to build crt1.o and gcrt1.o. This tells the compiler to generate code to branch based on an address in a register. This allows us to have binaries larger than the 32MiB limit of a branch instruction. The main use of this is with clang. Clang 3.8.0 has been shown to be larger than the above limit. --- lib/csu/arm/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/csu/arm/Makefile b/lib/csu/arm/Makefile index 9747619c233..1b3a6f84213 100644 --- a/lib/csu/arm/Makefile +++ b/lib/csu/arm/Makefile @@ -7,6 +7,7 @@ OBJS= ${SRCS:N*.h:R:S/$/.o/g} OBJS+= Scrt1.o gcrt1.o CFLAGS+= -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include +STATIC_CFLAGS+= -mlong-calls FILES= ${OBJS} FILESMODE= ${LIBMODE} @@ -23,14 +24,14 @@ CLEANFILES+= crt1.s gcrt1.s Scrt1.s # directly compiled to .o files. crt1.s: crt1.c - ${CC} ${CFLAGS} -S -o ${.TARGET} ${.CURDIR}/crt1.c + ${CC} ${CFLAGS} ${STATIC_CFLAGS} -S -o ${.TARGET} ${.CURDIR}/crt1.c sed ${SED_FIX_NOTE} ${.TARGET} crt1.o: crt1.s ${CC} ${ACFLAGS} -c -o ${.TARGET} crt1.s gcrt1.s: crt1.c - ${CC} ${CFLAGS} -DGCRT -S -o ${.TARGET} ${.CURDIR}/crt1.c + ${CC} ${CFLAGS} ${STATIC_CFLAGS} -DGCRT -S -o ${.TARGET} ${.CURDIR}/crt1.c sed ${SED_FIX_NOTE} ${.TARGET} gcrt1.o: gcrt1.s From 868a59a7e296bed53dd7b986e79964eed8a59f71 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Mon, 11 Jan 2016 03:30:16 +0000 Subject: [PATCH 56/67] hyperv/kvp_daemon: Make poll(2) block indefinitely Submitted by: Jun Su Reviewed by: Dexuan Cui , me, adrain Approved by: adrian Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D4762 --- contrib/hyperv/tools/hv_kvp_daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/hyperv/tools/hv_kvp_daemon.c b/contrib/hyperv/tools/hv_kvp_daemon.c index 50ae3ed7455..9b9e3a6feb9 100644 --- a/contrib/hyperv/tools/hv_kvp_daemon.c +++ b/contrib/hyperv/tools/hv_kvp_daemon.c @@ -1437,7 +1437,7 @@ main(int argc, char *argv[]) for (;;) { - r = poll (hv_kvp_poll_fd, 1, 100); + r = poll (hv_kvp_poll_fd, 1, INFTIM); KVP_LOG(LOG_DEBUG, "poll returned r = %d, revent = 0x%x\n", r, hv_kvp_poll_fd[0].revents); From e5f3746abd5d28b3ba8b9cd3421e7e8558fedaef Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Mon, 11 Jan 2016 08:00:13 +0000 Subject: [PATCH 57/67] Do not rewrite all ro_flags. --- sys/net/route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/net/route.h b/sys/net/route.h index 97709f17247..f30a72fb31e 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -228,7 +228,7 @@ rt_update_ro_flags(struct route *ro) ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW); - ro->ro_flags = (rt_flags & RTF_REJECT) ? RT_REJECT : 0; + ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0; ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0; ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0; } From 59747033cd59f8af459e736172e78e111972f9f5 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Mon, 11 Jan 2016 08:45:28 +0000 Subject: [PATCH 58/67] Bring RADIX_MPATH support to new routing KPI to ease migration. Move actual rte selection process from rtalloc_mpath_fib() to the rt_path_selectrte() function. Add public rt_mpath_select() to use in fibX_lookup_ functions. --- sys/net/radix_mpath.c | 61 +++++++++++++++++++++++++++--------------- sys/net/radix_mpath.h | 1 + sys/netinet/in_fib.c | 7 +++++ sys/netinet6/in6_fib.c | 7 +++++ 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index 82c0adda0d7..56574002a97 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -197,14 +197,49 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt, return (0); } -void -rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) +static struct rtentry * +rt_mpath_selectrte(struct rtentry *rte, uint32_t hash) { struct radix_node *rn0, *rn; u_int32_t n; struct rtentry *rt; int64_t weight; + /* beyond here, we use rn as the master copy */ + rn0 = rn = (struct radix_node *)rte; + n = rn_mpath_count(rn0); + + /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ + hash += hashjitter; + hash %= n; + for (weight = abs((int32_t)hash), rt = rte; + weight >= rt->rt_weight && rn; + weight -= rt->rt_weight) { + + /* stay within the multipath routes */ + if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) + break; + rn = rn->rn_dupedkey; + rt = (struct rtentry *)rn; + } + + return (rt); +} + +struct rtentry * +rt_mpath_select(struct rtentry *rte, uint32_t hash) +{ + if (rn_mpath_next((struct radix_node *)rte) == NULL) + return (rte); + + return (rt_mpath_selectrte(rte, hash)); +} + +void +rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) +{ + struct rtentry *rt; + /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? @@ -222,34 +257,18 @@ rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) return; } - /* beyond here, we use rn as the master copy */ - rn0 = rn = (struct radix_node *)ro->ro_rt; - n = rn_mpath_count(rn0); - - /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ - hash += hashjitter; - hash %= n; - for (weight = abs((int32_t)hash), rt = ro->ro_rt; - weight >= rt->rt_weight && rn; - weight -= rt->rt_weight) { - - /* stay within the multipath routes */ - if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) - break; - rn = rn->rn_dupedkey; - rt = (struct rtentry *)rn; - } + rt = rt_mpath_selectrte(ro->ro_rt, hash); /* XXX try filling rt_gwroute and avoid unreachable gw */ /* gw selection has failed - there must be only zero weight routes */ - if (!rn) { + if (!rt) { RT_UNLOCK(ro->ro_rt); ro->ro_rt = NULL; return; } if (ro->ro_rt != rt) { RTFREE_LOCKED(ro->ro_rt); - ro->ro_rt = (struct rtentry *)rn; + ro->ro_rt = rt; RT_LOCK(ro->ro_rt); RT_ADDREF(ro->ro_rt); diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h index bcb210e37cf..fc6f7775a5f 100644 --- a/sys/net/radix_mpath.h +++ b/sys/net/radix_mpath.h @@ -52,6 +52,7 @@ int rt_mpath_conflict(struct radix_node_head *, struct rtentry *, struct sockaddr *); void rtalloc_mpath_fib(struct route *, u_int32_t, u_int); #define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0) +struct rtentry *rt_mpath_select(struct rtentry *, uint32_t); struct radix_node *rn_mpath_lookup(void *, void *, struct radix_node_head *); int rt_mpath_deldup(struct rtentry *, struct rtentry *); diff --git a/sys/netinet/in_fib.c b/sys/netinet/in_fib.c index 451b37474e3..352c6d00efd 100644 --- a/sys/netinet/in_fib.c +++ b/sys/netinet/in_fib.c @@ -200,6 +200,13 @@ fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, rn = rh->rnh_matchaddr((void *)&sin, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); +#ifdef RADIX_MPATH + rte = rt_mpath_select(rte, flowid); + if (rte == NULL) { + RADIX_NODE_HEAD_RUNLOCK(rh); + return (ENOENT); + } +#endif /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib4_rte_to_nh_extended(rte, dst, flags, pnh4); diff --git a/sys/netinet6/in6_fib.c b/sys/netinet6/in6_fib.c index eba5b7c9cb0..96acfbbf679 100644 --- a/sys/netinet6/in6_fib.c +++ b/sys/netinet6/in6_fib.c @@ -241,6 +241,13 @@ fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, rn = rh->rnh_matchaddr((void *)&sin6, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); +#ifdef RADIX_MPATH + rte = rt_mpath_select(rte, flowid); + if (rte == NULL) { + RADIX_NODE_HEAD_RUNLOCK(rh); + return (ENOENT); + } +#endif /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags, From b53f4a640f0ec002d4302d9f0aacb871a6302d1d Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Mon, 11 Jan 2016 09:15:25 +0000 Subject: [PATCH 59/67] sfxge: add Medford build option disabled by default Submitted by: Mark Spender Sponsored by: Solarflare Communications, Inc. MFC after: 2 days --- sys/dev/sfxge/common/efsys.h | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/sfxge/common/efsys.h b/sys/dev/sfxge/common/efsys.h index 10dc04ded67..a3980ff191d 100644 --- a/sys/dev/sfxge/common/efsys.h +++ b/sys/dev/sfxge/common/efsys.h @@ -238,6 +238,7 @@ sfxge_map_mbuf_fast(bus_dma_tag_t tag, bus_dmamap_t map, #define EFSYS_OPT_FALCON_NIC_CFG_OVERRIDE 0 #define EFSYS_OPT_SIENA 1 #define EFSYS_OPT_HUNTINGTON 1 +#define EFSYS_OPT_MEDFORD 0 #ifdef DEBUG #define EFSYS_OPT_CHECK_REG 1 #else From 481b36c66aa200e9f802d948b5c4ca63abc45266 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Mon, 11 Jan 2016 10:24:30 +0000 Subject: [PATCH 60/67] Close iSCSI sessions on shutdown Ensure that all iSCSI sessions are correctly terminated during shutdown. * Enhances the changes done by r286226 (D3052). * Add shutdown post sync event to run after filesystem shutdown (SHUTDOWN_PRI_FIRST) but before CAM shutdown (SHUTDOWN_PRI_DEFAULT). * Changes iscsi_maintenance_thread to processes terminate in preference to reconnect. Reviewed by: trasz MFC after: 2 weeks Sponsored by: Multiplay Differential Revision: https://reviews.freebsd.org/D4429 --- sys/dev/iscsi/iscsi.c | 99 +++++++++++++++++++++++++++++++------------ sys/dev/iscsi/iscsi.h | 3 +- 2 files changed, 73 insertions(+), 29 deletions(-) diff --git a/sys/dev/iscsi/iscsi.c b/sys/dev/iscsi/iscsi.c index d792746091d..a9bdaf79dd8 100644 --- a/sys/dev/iscsi/iscsi.c +++ b/sys/dev/iscsi/iscsi.c @@ -98,6 +98,9 @@ SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, static int fail_on_disconnection = 0; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, &fail_on_disconnection, 0, "Destroy CAM SIM on connection failure"); +static int fail_on_shutdown = 1; +SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_shutdown, CTLFLAG_RWTUN, + &fail_on_shutdown, 0, "Fail disconnected sessions on shutdown"); static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator"); static uma_zone_t iscsi_outstanding_zone; @@ -417,8 +420,6 @@ iscsi_maintenance_thread_terminate(struct iscsi_session *is) sc = is->is_softc; sx_xlock(&sc->sc_lock); - TAILQ_REMOVE(&sc->sc_sessions, is, is_next); - sx_xunlock(&sc->sc_lock); icl_conn_close(is->is_conn); callout_drain(&is->is_callout); @@ -450,6 +451,9 @@ iscsi_maintenance_thread_terminate(struct iscsi_session *is) #ifdef ICL_KERNEL_PROXY cv_destroy(&is->is_login_cv); #endif + TAILQ_REMOVE(&sc->sc_sessions, is, is_next); + sx_xunlock(&sc->sc_lock); + ISCSI_SESSION_DEBUG(is, "terminated"); free(is, M_ISCSI); @@ -473,12 +477,7 @@ iscsi_maintenance_thread(void *arg) STAILQ_EMPTY(&is->is_postponed)) cv_wait(&is->is_maintenance_cv, &is->is_lock); - if (is->is_reconnecting) { - ISCSI_SESSION_UNLOCK(is); - iscsi_maintenance_thread_reconnect(is); - continue; - } - + /* Terminate supersedes reconnect. */ if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_terminate(is); @@ -486,6 +485,12 @@ iscsi_maintenance_thread(void *arg) return; } + if (is->is_reconnecting) { + ISCSI_SESSION_UNLOCK(is); + iscsi_maintenance_thread_reconnect(is); + continue; + } + iscsi_session_send_postponed(is); ISCSI_SESSION_UNLOCK(is); } @@ -605,6 +610,11 @@ iscsi_callout(void *context) return; out: + if (is->is_terminating) { + ISCSI_SESSION_UNLOCK(is); + return; + } + ISCSI_SESSION_UNLOCK(is); if (reconnect_needed) @@ -2326,30 +2336,62 @@ iscsi_poll(struct cam_sim *sim) } static void -iscsi_shutdown(struct iscsi_softc *sc) +iscsi_terminate_sessions(struct iscsi_softc *sc) { struct iscsi_session *is; - /* - * Trying to reconnect during system shutdown would lead to hang. - */ - fail_on_disconnection = 1; + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) + iscsi_session_terminate(is); + while(!TAILQ_EMPTY(&sc->sc_sessions)) { + ISCSI_DEBUG("waiting for sessions to terminate"); + cv_wait(&sc->sc_cv, &sc->sc_lock); + } + ISCSI_DEBUG("all sessions terminated"); + sx_sunlock(&sc->sc_lock); +} + +static void +iscsi_shutdown_pre(struct iscsi_softc *sc) +{ + struct iscsi_session *is; + + if (!fail_on_shutdown) + return; /* * If we have any sessions waiting for reconnection, request * maintenance thread to fail them immediately instead of waiting * for reconnect timeout. + * + * This prevents LUNs with mounted filesystems that are supported + * by disconnected iSCSI sessions from hanging, however it will + * fail all queued BIOs. */ + ISCSI_DEBUG("forcing failing all disconnected sessions due to shutdown"); + + fail_on_disconnection = 1; + sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); - if (is->is_waiting_for_iscsid) + if (!is->is_connected) { + ISCSI_SESSION_DEBUG(is, "force failing disconnected session early"); iscsi_session_reconnect(is); + } ISCSI_SESSION_UNLOCK(is); } sx_sunlock(&sc->sc_lock); } +static void +iscsi_shutdown_post(struct iscsi_softc *sc) +{ + + ISCSI_DEBUG("removing all sessions due to shutdown"); + iscsi_terminate_sessions(sc); +} + static int iscsi_load(void) { @@ -2372,8 +2414,16 @@ iscsi_load(void) } sc->sc_cdev->si_drv1 = sc; - sc->sc_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, - iscsi_shutdown, sc, SHUTDOWN_PRI_DEFAULT-1); + sc->sc_shutdown_pre_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, + iscsi_shutdown_pre, sc, SHUTDOWN_PRI_FIRST); + /* + * shutdown_post_sync needs to run after filesystem shutdown and before + * CAM shutdown - otherwise when rebooting with an iSCSI session that is + * disconnected but has outstanding requests, dashutdown() will hang on + * cam_periph_runccb(). + */ + sc->sc_shutdown_post_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, + iscsi_shutdown_post, sc, SHUTDOWN_PRI_DEFAULT - 1); return (0); } @@ -2381,7 +2431,6 @@ iscsi_load(void) static int iscsi_unload(void) { - struct iscsi_session *is, *tmp; if (sc->sc_cdev != NULL) { ISCSI_DEBUG("removing device node"); @@ -2389,18 +2438,12 @@ iscsi_unload(void) ISCSI_DEBUG("device node removed"); } - if (sc->sc_shutdown_eh != NULL) - EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_eh); + if (sc->sc_shutdown_pre_eh != NULL) + EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_pre_eh); + if (sc->sc_shutdown_post_eh != NULL) + EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_post_eh); - sx_slock(&sc->sc_lock); - TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) - iscsi_session_terminate(is); - while(!TAILQ_EMPTY(&sc->sc_sessions)) { - ISCSI_DEBUG("waiting for sessions to terminate"); - cv_wait(&sc->sc_cv, &sc->sc_lock); - } - ISCSI_DEBUG("all sessions terminated"); - sx_sunlock(&sc->sc_lock); + iscsi_terminate_sessions(sc); uma_zdestroy(iscsi_outstanding_zone); sx_destroy(&sc->sc_lock); diff --git a/sys/dev/iscsi/iscsi.h b/sys/dev/iscsi/iscsi.h index fd52fa87a95..a2475f4ac8f 100644 --- a/sys/dev/iscsi/iscsi.h +++ b/sys/dev/iscsi/iscsi.h @@ -131,7 +131,8 @@ struct iscsi_softc { TAILQ_HEAD(, iscsi_session) sc_sessions; struct cv sc_cv; unsigned int sc_last_session_id; - eventhandler_tag sc_shutdown_eh; + eventhandler_tag sc_shutdown_pre_eh; + eventhandler_tag sc_shutdown_post_eh; }; #endif /* !ISCSI_H */ From 076b61309121dfb307e41d37116a5142c9efbd34 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Mon, 11 Jan 2016 15:35:29 +0000 Subject: [PATCH 61/67] DIOCGSECTORSIZE expects to write to a u_int, but struct zfs_probe_args member secsz was a uint16_t sys/boot/zfs/zfs.c has a probe args structure member, secsz, that is a uint16_t for media sector size; it is used as an argument for ioctl() at line 484. however, this ioctl writes 32 bits of data (u_int *) and therefore this ioctl will overwrite and corrupt 16 bits of memory. other use cases seem to use correct u_int type for secsz. PR: 204358 Submitted by: Toomas Soome Reviewed by: asomers, delphij, smh MFC after: 5 days Differential Revision: https://reviews.freebsd.org/D4811 --- sys/boot/zfs/zfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/boot/zfs/zfs.c b/sys/boot/zfs/zfs.c index c339b2d7beb..c8b1284bc8c 100644 --- a/sys/boot/zfs/zfs.c +++ b/sys/boot/zfs/zfs.c @@ -413,7 +413,7 @@ struct zfs_probe_args { int fd; const char *devname; uint64_t *pool_guid; - uint16_t secsz; + u_int secsz; }; static int From 5725f0e49002271ce479393ee510db7fa9d71bfb Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Mon, 11 Jan 2016 17:52:42 +0000 Subject: [PATCH 62/67] cxgbe: bind the ithreads that handle NIC rx to the correct CPU if the kernel is built with option RSS. --- sys/dev/cxgbe/t4_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 0d00a774de4..21d0cb8c3ec 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -3642,6 +3642,9 @@ setup_intr_handlers(struct adapter *sc) #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif +#ifdef RSS + int nbuckets = rss_getnumbuckets(); +#endif /* * Setup interrupts. @@ -3700,6 +3703,10 @@ setup_intr_handlers(struct adapter *sc) t4_intr, rxq, s); if (rc != 0) return (rc); +#ifdef RSS + bus_bind_intr(sc->dev, irq->res, + rss_getcpu(q % nbuckets)); +#endif irq++; rid++; vi->nintr++; From 4e7787a9e9d42ff00326e33c9e3d9c3355d689c1 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Mon, 11 Jan 2016 17:57:26 +0000 Subject: [PATCH 63/67] Record physical path information in ZFS Vdevs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c: If available, record the physical path of a vdev in ZFS meta-data. Do this both when opening the vdev, and when receiving an attribute change notification from GEOM. Make vdev_geom_close() synchronous instead of deferring its work to a GEOM event handler. There is no benefit to deferring the work and this prevents a future open call from referencing a consumer that is scheduled for destruction. The close followed by an immediate open will occur during a vdev reprobe triggered by any type of I/O error. Consolidate vdev_geom_close() and vdev_geom_detach() into vdev_geom_close() and vdev_geom_close_locked(). This also moves the cross linking operations between vdev and GEOM consumer into a single place (linking in vdev_geom_attach() and unlinking in vdev_geom_close_locked()). Submitted by: gibbs, asomers MFC after: 4 weeks Sponsored by: Spectra Logic Corp Differential Revision: https://reviews.freebsd.org/D4524 --- .../opensolaris/uts/common/fs/zfs/vdev_geom.c | 130 +++++++++++++----- 1 file changed, 98 insertions(+), 32 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index 5491442e9b3..5e52759a05f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -78,6 +78,9 @@ static void vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) { vdev_t *vd; + spa_t *spa; + char *physpath; + int error, physpath_len; vd = cp->private; if (vd == NULL) @@ -87,6 +90,47 @@ vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) vdev_geom_set_rotation_rate(vd, cp); return; } + + if (strcmp(attr, "GEOM::physpath") != 0) + return; + + if (g_access(cp, 1, 0, 0) != 0) + return; + + /* + * Record/Update physical path information for this device. + */ + spa = vd->vdev_spa; + physpath_len = MAXPATHLEN; + physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); + error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); + g_access(cp, -1, 0, 0); + if (error == 0) { + char *old_physpath; + + old_physpath = vd->vdev_physpath; + vd->vdev_physpath = spa_strdup(physpath); + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); + + if (old_physpath != NULL) { + int held_lock; + + held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER); + if (held_lock == 0) { + g_topology_unlock(); + spa_config_enter(spa, SCL_STATE, FTAG, + RW_WRITER); + } + + spa_strfree(old_physpath); + + if (held_lock == 0) { + spa_config_exit(spa, SCL_STATE, FTAG); + g_topology_lock(); + } + } + } + g_free(physpath); } static void @@ -97,8 +141,10 @@ vdev_geom_orphan(struct g_consumer *cp) g_topology_assert(); vd = cp->private; - if (vd == NULL) + if (vd == NULL) { + /* Vdev close in progress. Ignore the event. */ return; + } /* * Orphan callbacks occur from the GEOM event thread. @@ -120,7 +166,7 @@ vdev_geom_orphan(struct g_consumer *cp) } static struct g_consumer * -vdev_geom_attach(struct g_provider *pp) +vdev_geom_attach(struct g_provider *pp, vdev_t *vd) { struct g_geom *gp; struct g_consumer *cp; @@ -139,6 +185,7 @@ vdev_geom_attach(struct g_provider *pp) if (gp == NULL) { gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); gp->orphan = vdev_geom_orphan; + gp->attrchanged = vdev_geom_attrchanged; cp = g_new_consumer(gp); if (g_attach(cp, pp) != 0) { g_wither_geom(gp, ENXIO); @@ -175,28 +222,56 @@ vdev_geom_attach(struct g_provider *pp) ZFS_LOG(1, "Used existing consumer for %s.", pp->name); } } + + /* + * BUG: cp may already belong to a vdev. This could happen if: + * 1) That vdev is a shared spare, or + * 2) We are trying to reopen a missing vdev and we are scanning by + * guid. In that case, we'll ultimately fail to open this consumer, + * but not until after setting the private field. + * The solution is to: + * 1) Don't set the private field until after the open succeeds, and + * 2) Set it to a linked list of vdevs, not just a single vdev + */ + cp->private = vd; + vd->vdev_tsd = cp; + + /* Fetch initial physical path information for this device. */ + vdev_geom_attrchanged(cp, "GEOM::physpath"); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; return (cp); } static void -vdev_geom_detach(void *arg, int flag __unused) +vdev_geom_close_locked(vdev_t *vd) { struct g_geom *gp; struct g_consumer *cp; g_topology_assert(); - cp = arg; - gp = cp->geom; + + cp = vd->vdev_tsd; + if (cp == NULL) + return; ZFS_LOG(1, "Closing access to %s.", cp->provider->name); + KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__)); + vd->vdev_tsd = NULL; + vd->vdev_delayed_close = B_FALSE; + cp->private = NULL; + + gp = cp->geom; g_access(cp, -1, 0, -1); /* Destroy consumer on last close. */ if (cp->acr == 0 && cp->ace == 0) { - ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); if (cp->acw > 0) g_access(cp, 0, -cp->acw, 0); - g_detach(cp); + if (cp->provider != NULL) { + ZFS_LOG(1, "Destroyed consumer to %s.", + cp->provider->name); + g_detach(cp); + } g_destroy_consumer(cp); } /* Destroy geom if there are no consumers left. */ @@ -490,7 +565,7 @@ vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) } static struct g_consumer * -vdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid) +vdev_geom_attach_by_guids(vdev_t *vd) { struct g_class *mp; struct g_geom *gp, *zgp; @@ -519,9 +594,10 @@ vdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid) vdev_geom_read_guids(zcp, &pguid, &vguid); g_topology_lock(); vdev_geom_detach_taster(zcp); - if (pguid != pool_guid || vguid != vdev_guid) + if (pguid != spa_guid(vd->vdev_spa) || + vguid != vd->vdev_guid) continue; - cp = vdev_geom_attach(pp); + cp = vdev_geom_attach(pp, vd); if (cp == NULL) { printf("ZFS WARNING: Unable to " "attach to %s.\n", pp->name); @@ -551,7 +627,7 @@ vdev_geom_open_by_guids(vdev_t *vd) g_topology_assert(); ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); - cp = vdev_geom_attach_by_guids(spa_guid(vd->vdev_spa), vd->vdev_guid); + cp = vdev_geom_attach_by_guids(vd); if (cp != NULL) { len = strlen(cp->provider->name) + strlen("/dev/") + 1; buf = kmem_alloc(len, KM_SLEEP); @@ -585,7 +661,7 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); if (pp != NULL) { ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); - cp = vdev_geom_attach(pp); + cp = vdev_geom_attach(pp, vd); if (cp != NULL && check_guid && ISP2(pp->sectorsize) && pp->sectorsize <= VDEV_PAD_SIZE) { g_topology_unlock(); @@ -593,7 +669,7 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) g_topology_lock(); if (pguid != spa_guid(vd->vdev_spa) || vguid != vd->vdev_guid) { - vdev_geom_detach(cp, 0); + vdev_geom_close_locked(vd); cp = NULL; ZFS_LOG(1, "guid mismatch for provider %s: " "%ju:%ju != %ju:%ju.", vd->vdev_path, @@ -675,7 +751,8 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, !ISP2(cp->provider->sectorsize)) { ZFS_LOG(1, "Provider %s has unsupported sectorsize.", vd->vdev_path); - vdev_geom_detach(cp, 0); + + vdev_geom_close_locked(vd); error = EINVAL; cp = NULL; } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { @@ -692,19 +769,17 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, if (error != 0) { printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", vd->vdev_path, error); - vdev_geom_detach(cp, 0); + vdev_geom_close_locked(vd); cp = NULL; } } + g_topology_unlock(); PICKUP_GIANT(); if (cp == NULL) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } - - cp->private = vd; - vd->vdev_tsd = cp; pp = cp->provider; /* @@ -727,12 +802,6 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, */ vd->vdev_nowritecache = B_FALSE; - if (vd->vdev_physpath != NULL) - spa_strfree(vd->vdev_physpath); - bufsize = sizeof("/dev/") + strlen(pp->name); - vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); - snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); - /* * Determine the device's rotation rate. */ @@ -744,15 +813,12 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, static void vdev_geom_close(vdev_t *vd) { - struct g_consumer *cp; - cp = vd->vdev_tsd; - if (cp == NULL) - return; - vd->vdev_tsd = NULL; - vd->vdev_delayed_close = B_FALSE; - cp->private = NULL; /* XXX locking */ - g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); + DROP_GIANT(); + g_topology_lock(); + vdev_geom_close_locked(vd); + g_topology_unlock(); + PICKUP_GIANT(); } static void From adcd1d80b830faee527e42ec580d4631994364a0 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Mon, 11 Jan 2016 17:57:49 +0000 Subject: [PATCH 64/67] Update ismt(4) man page to reflect inclusion in upcoming 10.3 release. MFC after: 3 days Sponsored by: Intel --- share/man/man4/ismt.4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/man/man4/ismt.4 b/share/man/man4/ismt.4 index 63a39527880..b027f6f9592 100644 --- a/share/man/man4/ismt.4 +++ b/share/man/man4/ismt.4 @@ -33,7 +33,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 9, 2014 +.Dd January 11, 2016 .Dt ISMT 4 .Os .Sh NAME @@ -54,6 +54,6 @@ in the Intel Atom S1200 and C2000 CPUs. The .Nm driver first appeared in -.Fx 11.0 . +.Fx 10.3 . .Sh AUTHORS .An Jim Harris Aq Mt jimharris@FreeBSD.org From c829016e854b39d6d4603745c656bb257a22e81e Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Mon, 11 Jan 2016 18:11:06 +0000 Subject: [PATCH 65/67] Change the type of newsize argument in the smbfs_smb_setfsize() function from int to int64. MSDN says that SMB_SET_FILE_END_OF_FILE_INFO uses signed 64-bit integer to specify offset, but since smbfs_smb_setfsize() has used plain int, a value was truncated in case when offset was larger than 2G. https://msdn.microsoft.com/en-us/library/ff469975.aspx In particular, now `truncate -s 10G` will work correctly on the mounted SMB share. Reported and tested by: Eugene Grosbein MFC after: 1 week --- sys/fs/smbfs/smbfs_smb.c | 8 ++++---- sys/fs/smbfs/smbfs_subr.h | 3 ++- sys/fs/smbfs/smbfs_vnops.c | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sys/fs/smbfs/smbfs_smb.c b/sys/fs/smbfs/smbfs_smb.c index ada84c2ee97..c5b8a682915 100644 --- a/sys/fs/smbfs/smbfs_smb.c +++ b/sys/fs/smbfs/smbfs_smb.c @@ -333,18 +333,18 @@ smbfs_smb_flush(struct smbnode *np, struct smb_cred *scred) } int -smbfs_smb_setfsize(struct smbnode *np, int newsize, struct smb_cred *scred) +smbfs_smb_setfsize(struct smbnode *np, int64_t newsize, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; struct smb_rq *rqp; struct mbchain *mbp; int error; - if (!smbfs_smb_seteof(np, (int64_t) newsize, scred)) { + if (!smbfs_smb_seteof(np, newsize, scred)) { np->n_flag |= NFLUSHWIRE; return (0); } - + /* XXX: We should use SMB_COM_WRITE_ANDX to support large offsets */ error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_WRITE, scred, &rqp); if (error) return (error); @@ -352,7 +352,7 @@ smbfs_smb_setfsize(struct smbnode *np, int newsize, struct smb_cred *scred) smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, 0); - mb_put_uint32le(mbp, newsize); + mb_put_uint32le(mbp, (uint32_t)newsize); mb_put_uint16le(mbp, 0); smb_rq_wend(rqp); smb_rq_bstart(rqp); diff --git a/sys/fs/smbfs/smbfs_subr.h b/sys/fs/smbfs/smbfs_subr.h index d22df083462..4ee143c3d1c 100644 --- a/sys/fs/smbfs/smbfs_subr.h +++ b/sys/fs/smbfs/smbfs_subr.h @@ -128,7 +128,8 @@ int smbfs_smb_lock(struct smbnode *np, int op, caddr_t id, off_t start, off_t end, struct smb_cred *scred); int smbfs_smb_statfs(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred); -int smbfs_smb_setfsize(struct smbnode *np, int newsize, struct smb_cred *scred); +int smbfs_smb_setfsize(struct smbnode *np, int64_t newsize, + struct smb_cred *scred); int smbfs_smb_query_info(struct smbnode *np, const char *name, int len, struct smbfattr *fap, struct smb_cred *scred); diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c index 8ea11989944..c78ceb21206 100644 --- a/sys/fs/smbfs/smbfs_vnops.c +++ b/sys/fs/smbfs/smbfs_vnops.c @@ -358,7 +358,8 @@ smbfs_setattr(ap) doclose = 1; } if (error == 0) - error = smbfs_smb_setfsize(np, vap->va_size, scred); + error = smbfs_smb_setfsize(np, + (int64_t)vap->va_size, scred); if (doclose) smbfs_smb_close(ssp, np->n_fid, NULL, scred); if (error) { From e813d9d7fa641d4b7ec43227293d0c8adb7c1270 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Mon, 11 Jan 2016 19:14:55 +0000 Subject: [PATCH 66/67] ext4: add support for reading sparse files Add support for sparse files in ext4. Also implement read-ahead, which greatly increases the performance when transferring files from ext4. Both features implemented by Damjan Jovanovic. PR: 205816 MFC after: 1 week --- sys/fs/ext2fs/ext2_bmap.c | 29 ++++++++++++-------- sys/fs/ext2fs/ext2_extents.c | 26 ++++++++++++++++-- sys/fs/ext2fs/ext2_extents.h | 6 +++- sys/fs/ext2fs/ext2_vnops.c | 53 +++++++++++++++++++++++------------- 4 files changed, 80 insertions(+), 34 deletions(-) diff --git a/sys/fs/ext2fs/ext2_bmap.c b/sys/fs/ext2fs/ext2_bmap.c index 7d4a88007a8..8656e59ed8f 100644 --- a/sys/fs/ext2fs/ext2_bmap.c +++ b/sys/fs/ext2fs/ext2_bmap.c @@ -102,9 +102,6 @@ ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) fs = ip->i_e2fs; lbn = bn; - /* - * TODO: need to implement read ahead to improve the performance. - */ if (runp != NULL) *runp = 0; @@ -112,15 +109,25 @@ ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) *runb = 0; ext4_ext_find_extent(fs, ip, lbn, &path); - ep = path.ep_ext; - if (ep == NULL) - ret = EIO; - else { - *bnp = fsbtodb(fs, lbn - ep->e_blk + - (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); + if (path.ep_is_sparse) { + *bnp = -1; + if (runp != NULL) + *runp = path.ep_sparse_ext.e_len - + (lbn - path.ep_sparse_ext.e_blk) - 1; + } else { + ep = path.ep_ext; + if (ep == NULL) + ret = EIO; + else { + *bnp = fsbtodb(fs, lbn - ep->e_blk + + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); - if (*bnp == 0) - *bnp = -1; + if (*bnp == 0) + *bnp = -1; + + if (runp != NULL) + *runp = ep->e_len - (lbn - ep->e_blk) - 1; + } } if (path.ep_bp != NULL) { diff --git a/sys/fs/ext2fs/ext2_extents.c b/sys/fs/ext2fs/ext2_extents.c index 68704bb8850..1317fdcca2c 100644 --- a/sys/fs/ext2fs/ext2_extents.c +++ b/sys/fs/ext2fs/ext2_extents.c @@ -66,13 +66,14 @@ static void ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn) { struct ext4_extent_header *ehp = path->ep_header; - struct ext4_extent *l, *r, *m; + struct ext4_extent *first, *l, *r, *m; if (ehp->eh_ecount == 0) return; - l = (struct ext4_extent *)(char *)(ehp + 1); - r = (struct ext4_extent *)(char *)(ehp + 1) + ehp->eh_ecount - 1; + first = (struct ext4_extent *)(char *)(ehp + 1); + l = first; + r = first + ehp->eh_ecount - 1; while (l <= r) { m = l + (r - l) / 2; if (lbn < m->e_blk) @@ -81,7 +82,25 @@ ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn) l = m + 1; } + if (l == first) { + path->ep_sparse_ext.e_blk = lbn; + path->ep_sparse_ext.e_len = first->e_blk - lbn; + path->ep_sparse_ext.e_start_hi = 0; + path->ep_sparse_ext.e_start_lo = 0; + path->ep_is_sparse = 1; + return; + } path->ep_ext = l - 1; + if (path->ep_ext->e_blk + path->ep_ext->e_len <= lbn) { + path->ep_sparse_ext.e_blk = lbn; + if (l <= (first + ehp->eh_ecount - 1)) + path->ep_sparse_ext.e_len = l->e_blk - lbn; + else // XXX: where does it end? + path->ep_sparse_ext.e_len = 1; + path->ep_sparse_ext.e_start_hi = 0; + path->ep_sparse_ext.e_start_lo = 0; + path->ep_is_sparse = 1; + } } /* @@ -169,6 +188,7 @@ ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *ip, path->ep_depth = i; path->ep_ext = NULL; path->ep_index = NULL; + path->ep_is_sparse = 0; ext4_ext_binsearch(ip, path, lbn); return (path); diff --git a/sys/fs/ext2fs/ext2_extents.h b/sys/fs/ext2fs/ext2_extents.h index 94ded83b1eb..4ce16f3e69b 100644 --- a/sys/fs/ext2fs/ext2_extents.h +++ b/sys/fs/ext2fs/ext2_extents.h @@ -84,7 +84,11 @@ struct ext4_extent_cache { struct ext4_extent_path { uint16_t ep_depth; struct buf *ep_bp; - struct ext4_extent *ep_ext; + int ep_is_sparse; + union { + struct ext4_extent ep_sparse_ext; + struct ext4_extent *ep_ext; + }; struct ext4_extent_index *ep_index; struct ext4_extent_header *ep_header; }; diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c index 5b8990ee84c..bc239b8f163 100644 --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -1787,6 +1787,7 @@ ext2_ioctl(struct vop_ioctl_args *ap) static int ext4_ext_read(struct vop_read_args *ap) { + static unsigned char zeroes[EXT2_MAX_BLOCK_SIZE]; struct vnode *vp; struct inode *ip; struct uio *uio; @@ -1831,11 +1832,15 @@ ext4_ext_read(struct vop_read_args *ap) switch (cache_type) { case EXT4_EXT_CACHE_NO: ext4_ext_find_extent(fs, ip, lbn, &path); - ep = path.ep_ext; + if (path.ep_is_sparse) + ep = &path.ep_sparse_ext; + else + ep = path.ep_ext; if (ep == NULL) return (EIO); - ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN); + ext4_ext_put_cache(ip, ep, + path.ep_is_sparse ? EXT4_EXT_CACHE_GAP : EXT4_EXT_CACHE_IN); newblk = lbn - ep->e_blk + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32); @@ -1848,7 +1853,7 @@ ext4_ext_read(struct vop_read_args *ap) case EXT4_EXT_CACHE_GAP: /* block has not been allocated yet */ - return (0); + break; case EXT4_EXT_CACHE_IN: newblk = lbn - nex.e_blk + (nex.e_start_lo | @@ -1859,24 +1864,34 @@ ext4_ext_read(struct vop_read_args *ap) panic("%s: invalid cache type", __func__); } - error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - - size -= bp->b_resid; - if (size < xfersize) { - if (size == 0) { - bqrelse(bp); - break; + if (cache_type == EXT4_EXT_CACHE_GAP || + (cache_type == EXT4_EXT_CACHE_NO && path.ep_is_sparse)) { + if (xfersize > sizeof(zeroes)) + xfersize = sizeof(zeroes); + error = uiomove(zeroes, xfersize, uio); + if (error) + return (error); + } else { + error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, + NOCRED, &bp); + if (error) { + brelse(bp); + return (error); } - xfersize = size; + + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) { + bqrelse(bp); + break; + } + xfersize = size; + } + error = uiomove(bp->b_data + blkoffset, xfersize, uio); + bqrelse(bp); + if (error) + return (error); } - error = uiomove(bp->b_data + blkoffset, (int)xfersize, uio); - bqrelse(bp); - if (error) - return (error); } return (0); From daf884fa9f466831ff8ed9c4004a8d7b4bb021b4 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Mon, 11 Jan 2016 19:25:43 +0000 Subject: [PATCH 67/67] ext4: mount panic from freeing invalid pointers Initialize the struct with those fields to zeroes on allocation, preventing the panic. Patch by: Damjan Jovanovic. PR: 206056 MFC after: 3 days --- sys/fs/ext2fs/ext2_vfsops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c index ce943ea047b..5339aa2ea57 100644 --- a/sys/fs/ext2fs/ext2_vfsops.c +++ b/sys/fs/ext2fs/ext2_vfsops.c @@ -590,7 +590,7 @@ ext2_mountfs(struct vnode *devvp, struct mount *mp) * while Linux keeps the super block in a locked buffer. */ ump->um_e2fs = malloc(sizeof(struct m_ext2fs), - M_EXT2MNT, M_WAITOK); + M_EXT2MNT, M_WAITOK | M_ZERO); ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), M_EXT2MNT, M_WAITOK); mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF);